Source code for sciris.sc_utils

"""
Miscellaneous utilities for type checking, printing, dates and times, etc.

Note: there are a lot! The design philosophy has been that it's easier to
ignore a function that you don't need than write one from scratch that you
do need.

Highlights:
    - :func:`sc.dcp() <dcp>`: shortcut to :func:`copy.deepcopy()`, plus optional robust copying
    - :func:`sc.pp() <pp>`: shortcut to :func:`pprint.pprint()`
    - :func:`sc.isnumber() <isnumber>`: checks if something is any number type
    - :func:`sc.tolist() <tolist>`: converts any object to a list, for easy iteration
    - :func:`sc.toarray() <toarray>`: tries to convert any object to an array, for easy use with numpy
    - :func:`sc.mergedicts() <mergedicts>`: merges any set of inputs into a dictionary
    - :func:`sc.mergelists() <mergelists>`: merges any set of inputs into a list
    - :func:`sc.runcommand() <runcommand>`: simple way of executing a shell command
    - :func:`sc.download() <download>`: download multiple URLs in parallel
"""

##############################################################################
#%% Imports
##############################################################################
import re
import sys
import types
import copy
import json
import string
import numbers
import pprint
import hashlib
import getpass
import inspect
import warnings
import functools
import importlib
import subprocess
import unicodedata
import numpy as np
import pandas as pd
import random as rnd
import uuid as py_uuid
import contextlib as cl
import traceback as py_traceback
from collections import abc
from pathlib import Path
import sciris as sc

# Handle types
_stringtypes = (str, bytes, bytearray)
_numtype     = (numbers.Number,)
_booltypes   = (bool, np.bool_)

# Store these for access by other modules
__all__ = ['_stringtypes', '_numtype', '_booltypes']


##############################################################################
#%% Adaptations from other libraries
##############################################################################

# Define the modules being loaded
__all__ += ['fast_uuid', 'uuid', 'cp', 'dcp', 'robust_dcp', 'pp', 'sha', 'traceback', 'getuser',
           'getplatform', 'iswindows', 'islinux', 'ismac', 'isjupyter', 'asciify']



[docs]
def fast_uuid(which=None, length=None, n=1, secure=False, forcelist=False, safety=1000, recursion=0, recursion_limit=10, verbose=True):
    """
    Create a fast UID or set of UIDs. Note: for certain applications, :func:`sc.uuid() <uuid>`
    is faster than :func:`sc.fast_uuid() <fast_uuid>`!

    Args:
        which (str): the set of characters to choose from (default ascii)
        length (int): length of UID (default 6)
        n (int): number of UIDs to generate
        secure (bool): whether to generate random numbers from sources provided by the operating system
        forcelist (bool): whether or not to return a list even for a single UID (used for recursive calls)
        safety (float): ensure that the space of possible UIDs is at least this much larger than the number requested
        recursion (int): the recursion level of the call (since the function calls itself if not all UIDs are unique)
        recursion_limit (int): Maximum number of times to try regeneraring keys
        verbose (bool): whether to show progress

    Returns:
        uid (str or list): a string UID, or a list of string UIDs

    **Example**::

        uuids = sc.fast_uuid(n=100) # Generate 100 UUIDs

    Inspired by https://stackoverflow.com/questions/2257441/random-string-generation-with-upper-case-letters-and-digits/30038250#30038250
    """

    # Set defaults
    if which  is None: which  = 'ascii'
    if length is None: length = 6
    length = int(length)
    n = int(n)

    choices = {
        'lowercase':    string.ascii_lowercase,
        'letters':      string.ascii_letters,
        'numeric':      string.digits,
        'digits':       string.digits,
        'hex':          string.hexdigits.lower(),
        'hexdigits':    string.hexdigits.lower(),
        'alphanumeric': string.ascii_lowercase + string.digits,
        'ascii':        string.ascii_letters + string.digits,
        }

    if which not in choices: # pragma: no cover
        errormsg = f'Choice {which} not found; choices are: {strjoin(choices.keys())}'
        raise KeyError(errormsg)
    else:
        charlist = choices[which]

    # Check that there are enough options
    if n > 1:
        n_possibilities = len(charlist)**length
        allowed = n_possibilities//safety
        if n > allowed:
            errormsg = f'With a UID of type "{which}" and length {length}, there are {n_possibilities} possible UIDs, and you requested {n}, which exceeds the maximum allowed ({allowed})'
            raise ValueError(errormsg)

    # Secure uses system random which is secure, but >10x slower
    if secure: # pragma: no cover
        choices_func = rnd.SystemRandom().choices
    else:
        choices_func = rnd.choices

    # Generate the UUID(s) string as one big block
    uid_str = ''.join(choices_func(charlist, k=length*n))

    # Parse if n==1
    if n == 1:
        if forcelist:
            output = [uid_str]
        else:
            output = uid_str

    # Otherwise, we're generating multiple, so do additional checking to ensure they're actually unique
    else:
        # Split from one long string into multiple and check length
        output = [uid_str[chunk*length:(chunk+1)*length] for chunk in range(len(uid_str)//length)]
        n_unique_keys = len(dict.fromkeys(output))

        # Check that length is correct, i.e. no duplicates!
        while n_unique_keys != n:

            # Set recursion and do error checking
            recursion += 1
            if recursion > recursion_limit:
                errormsg = f'Could only generate {n_unique_keys}/{n} unique UIDs after {recursion_limit} tries: please increase UID length or character set size to ensure more unique options'
                raise ValueError(errormsg)
            if verbose:
                print(f'Warning: duplicates found in UID list ({n_unique_keys}/{n} unique); regenerating...')

            # Extend the list of UIDs
            new_n = n - n_unique_keys
            new_uuids = fast_uuid(which=which, length=length, n=new_n, secure=secure, safety=safety, recursion=recursion, recursion_limit=recursion_limit, verbose=verbose, forcelist=True)
            output.extend(new_uuids)
            n_unique_keys = len(dict.fromkeys(output)) # Recalculate the number of keys

    return output




[docs]
def uuid(uid=None, which=None, die=False, tostring=False, length=None, n=1, **kwargs):
    """
    Shortcut for creating a UUID; default is to create a UUID4. Can also convert a UUID.

    Args:
        uid (str or uuid): if a string, convert to an actual UUID; otherwise, return unchanged
        which (int or str): if int, choose a Python UUID function; otherwise, generate a random alphanumeric string (default 4)
        die (bool): whether to fail for converting a supplied uuid (default False)
        tostring (bool): whether or not to return a string instead of a UUID object (default False)
        length (int): number of characters to trim to, if returning a string
        n (int): number of UUIDs to generate; if n>1, return a list

    Returns:
        uid (UUID or str): the UID object

    **Examples**::

        sc.uuid() # Alias to uuid.uuid4()
        sc.uuid(which='hex') # Creates a length-6 hex string
        sc.uuid(which='ascii', length=10, n=50) # Creates 50 UUIDs of length 10 each using the full ASCII character set
    """

    # Set default UUID type
    if which is None:
        which = 4
    n = int(n)

    # Choose the different functions
    if   which==1: uuid_func = py_uuid.uuid1
    elif which==3: uuid_func = py_uuid.uuid3
    elif which==4: uuid_func = py_uuid.uuid4
    elif which==5: uuid_func = py_uuid.uuid5
    else:
        return fast_uuid(which=which, length=length, n=n, **kwargs) # ...or just go to fast_uuid()

    # If a UUID was supplied, try to parse it
    if uid is not None:
        try:
            if isinstance(uid, py_uuid.UUID):
                output = uid # Use directly
            else: # Convert
                output = py_uuid.UUID(uid)
        except Exception as E: # pragma: no cover
            errormsg = f'Could not convert "{uid}" to a UID ({repr(E)})'
            if die:
                raise TypeError(errormsg)
            else:
                print(errormsg)
                uid = None # Just create a new one

    # If not, make a new one
    if uid is None:
        uuid_list = []
        for i in range(n): # Loop over
            uid = uuid_func(**kwargs)  # If not supplied, create a new UUID

            # Convert to a string, and optionally trim
            if tostring or length:
                uid = str(uid)
            if length:
                if length<len(uid):
                    uid = uid[:length]
                else:
                    errormsg = f'Cannot choose first {length} chars since UID has length {len(uid)}'
                    raise ValueError(errormsg)
            uuid_list.append(uid)

        # Process the output: string if 1, list if more
        if len(uuid_list) == 1:
            output = uuid_list[0]
        else:
            output = uuid_list

    return output




[docs]
def cp(obj, die=True):
    """
    Shortcut to perform a shallow copy operation

    Almost identical to :func:`copy.copy()`, but optionally allow failures

    | *New in version 2.0.0:* default die=True instead of False
    | *New in version 3.1.4:* "verbose" argument removed; warning raised
    """
    try:
        output = copy.copy(obj)
    except Exception as E:
        output = obj
        errormsg = f'Could not perform shallow copy of {type(obj)}'
        if die:
            raise ValueError(errormsg) from E
        else:
            warnmsg = errormsg + ', returning original object...'
            warnings.warn(warnmsg, category=RuntimeWarning, stacklevel=2)
    return output




[docs]
def dcp(obj, memo=None, die=True, verbose=True):
    """
    Shortcut to perform a deep copy operation

    Almost identical to :func:`copy.deepcopy()`, but optionally fall back to :func:`copy.copy()`
    if deepcopy fails.

    Args:
        die (bool): if False, fall back to :func:`copy.copy()`

    | *New in version 2.0.0:* default die=True instead of False
    | *New in version 3.1.4:* die=False passed to `sc.cp()`; "verbose" argument removed; warning raised
    | *New in version 3.2.0:* "memo" argument
    | *New in verison 3.2.4:* fall back to `sc.robust_dcp()` instead of `sc.cp()`; "verbose" argument added
    """
    try:
        output = copy.deepcopy(obj, memo=memo)
    except Exception as E: # pragma: no cover
        errormsg = f'Could not perform deep copy of {type(obj)}:\n{E}'
        if die:
            errormsg += '\n\nUse sc.dcp(obj, die=False) to perform a robust copy instead.'
            raise ValueError(errormsg) from E
        else:
            output = robust_dcp(obj, verbose=verbose, _memo=memo)
            if verbose:
                warnmsg = errormsg + '\nPerforming robust copy instead...'
                warnings.warn(warnmsg, category=RuntimeWarning, stacklevel=2)
    return output




[docs]
def robust_dcp(obj, _memo=None, verbose=False):
    """
    Ultra-robust deepcopying

    Deep-copy anything that can be deep-copied, then try a shallow copy of that
    attribute, and otherwise return the original object.

    Co-authored with ChatGPT.

    | *New in version 3.2.4.*
    """
    # Immutable primitives that never need copying
    primitives = (int, float, complex, bool, str, bytes, tuple, frozenset, types.NoneType)
    exceptions = [] # Print exceptions if verbose=True

    def print_exc(exceptions, obj):
        msg = f'Encountered {len(exceptions)} exceptions while copying:\n{obj}\n'
        line = '—'*60 + '\n'
        for i,exc in enumerate(exceptions):
            msg += line
            msg += f'Exception {i+1}\n'
            msg += line
            msg += f'{exc}\n\n'
        print(msg)

    if _memo is None:
        _memo = {}
    obj_id = id(obj)
    if obj_id in _memo: # preserve reference cycles
        return _memo[obj_id]

    # Fast-path primitives (immutable, safe to reuse)
    if isinstance(obj, primitives):
        return obj

    # First, try vanilla deepcopy
    try:
        dup = copy.deepcopy(obj, memo=_memo)
        _memo[obj_id] = dup
        return dup
    except Exception as e: # fall through and attempt element-wise copy
        if verbose: exceptions.append(e)

    # Containers
    kw = dict(verbose=verbose, _memo=_memo)
    if isinstance(obj, abc.Mapping):
        dup = obj.__class__((robust_dcp(k, **kw), robust_dcp(v, **kw)) for k, v in obj.items())
    elif isinstance(obj, (abc.Sequence, abc.Set)) and not isinstance(obj, (str, bytes, bytearray)):
        dup = obj.__class__(robust_dcp(x, verbose=verbose, _memo=_memo) for x in obj)
    else: # Plain custom object: allocate blank instance, then copy attributes
        dup = object.__new__(obj.__class__)
        _memo[obj_id] = dup  # stash early to handle self-refs
        try:
            attrs = vars(obj)  # works for classes with __dict__
        except TypeError as e:      # maybe it's using __slots__ only
            attrs = {slot: getattr(obj, slot) for slot in getattr(obj, "__slots__", [])}
            if verbose: exceptions.append(e)

        for name, value in attrs.items():
            try:
                copied_val = robust_dcp(value, **kw)
            except Exception as e:
                if verbose: exceptions.append(e)
                try: # Shallow fallback on *this* attribute
                    copied_val = copy.copy(value)
                except Exception as e:
                    copied_val = value
                    if verbose: exceptions.append(e)
            try:
                setattr(dup, name, copied_val)
            except AttributeError as e: # read-only attribute: leave original reference
                if verbose: exceptions.append(e)

        if verbose: print_exc(exceptions, obj)
        return dup

    if verbose: print_exc(exceptions, obj)
    _memo[obj_id] = dup
    return dup



def _printout(string=None, doprint=None, output=False):
    """
    Short for "print or output string". Not for public use.

    Private helper function to handle the logic of two boolean flags: by default, print;
    but if output is requested, return the output and do not print; unless doprint
    is set to True.
    """
    # Default for doprint is opposite of output
    if doprint is None: doprint = not(output)

    # Handle actions/output
    if doprint: print(string)
    if output: return string
    else:      return



[docs]
def pp(obj, jsonify=False, doprint=None, output=False, sort_dicts=False, **kwargs):
    """
    Shortcut for pretty-printing the object.

    Almost identical to :func:`pprint.pprint()`, but can also be used as an alias for
    :func:`pprint.pformat()`.

    Args:
        obj     (any):  object to print
        jsonify (bool): whether to first convert the object to JSON, to handle things like ordered dicts nicely
        doprint (bool): whether to show output (default true)
        output  (bool): whether to return output as a string (default false)
        sort_dicts (bool): whether to sort dictionary keys (default false, unlike :func:`pprint.pprint()`)
        kwargs  (dict): passed to :func:`pprint.pprint()`

    **Example**::

        d = {'my very': {'large': 'and', 'unwieldy': {'nested': 'dictionary', 'cannot': 'be', 'easily': 'printed'}}}
        sc.pp(d)

    *New in version 1.3.1:* output argument
    *New in version 3.0.0:* "jsonify" defaults to False; sort_dicts defaults to False; removed "verbose" argument
    """

    # Get object
    if jsonify:
        try:
            toprint = json.loads(json.dumps(obj, default=lambda x: f'{x}')) # This is to handle things like OrderedDicts
        except Exception as E: # pragma: no cover
            print(f'Could not jsonify object ("{str(E)}"), printing default...')
            toprint = obj # If problems are encountered, just return the object
    else: # pragma: no cover
        toprint = obj

    # Decide what to do with object
    string = pprint.pformat(toprint, sort_dicts=sort_dicts, **kwargs)
    return _printout(string=string, doprint=doprint, output=output)




[docs]
def sha(obj, digest=False, asint=False, encoding='utf-8'):
    """
    Shortcut for the standard hashing (SHA) method

    Equivalent to :obj:`hashlib.sha224()`.

    Args:
        obj (any): the object to be hashed; if not a string, converted to one
        digest (bool): if True, return the hex digest instead of the hash object
        asint (bool): if True, return the (very large) integer corresponding to the hex digest
        encoding (str): the encoding to use

    **Example**::

        sha1 = sc.sha(dict(foo=1, bar=2), True)
        sha2 = sc.sha(dict(foo=1, bar=2), digest=True)
        sha3 = sc.sha(dict(foo=1, bar=3), digest=True)
        assert sha1 == sha2
        assert sha2 != sha3

    | *New in version 3.2.0:* "asint" argument; changed argument order
    """
    # Prepare argument
    if not isinstance(obj, (str, bytes)): # Ensure it's actually a string
        obj = repr(obj) # More robust than str()
    if isinstance(obj, str): # If it's unicode, encode it to bytes first
        obj = obj.encode(encoding)

    # Do the hashing and prepare the output
    out = hashlib.sha224(obj)
    if digest or asint:
        out = out.hexdigest()
        if asint:
            out = int(out, 16) # Base-16 integer
    return out




[docs]
def traceback(exc=None, value=None, tb=None, verbose=False, *args, **kwargs):
    """
    Shortcut for accessing the traceback

    Alias for :obj:`traceback.format_exc()`.

    If no argument is provided, then use the last exception encountered.

    Args:
        exc (Exception, tuple/list, or type): the exception to get the traceback from
        value (Exception): the actual exception
        tb (Traceback): the traceback
        verbose (bool): whether to print the exception

    **Examples**::

        # Use automatic exception info
        mylist = [0,1]
        try:
            mylist[2]
        except:
            print(f'Error: {sc.traceback()}')

        # Supply exception manually (also illustrating sc.tryexcept())
        with sc.tryexcept() as te1:
            dict(a=3)['b']

        with sc.tryexcept() as te2:
            [0,1][2]

        tb1 = sc.traceback(te1.exception)
        tb2 = sc.traceback(te2.exception)
        print(f'Tracebacks were:\n\n{tb1}\n{tb2}')
    """
    if exc is not None:
        if isinstance(exc, Exception): # Usual case: an exception is supplied
            exc_info = (exc.__class__, exc, exc.__traceback__)
        elif isinstance(exc, (tuple, list)): # Alternately, all three are supplied as a list # pragma: no cover
            exc_info = exc
        elif value is not None and tb is not None: # ... or separately # pragma: no cover
            exc_info = (exc, value, tb)
        else: # pragma: no cover
            errormsg = f'Unexpected exception type "{type(exc)}": expecting Exception, or list/tuple of exc_info'
            raise TypeError(errormsg)
        out = ''.join(py_traceback.format_exception(*exc_info, **kwargs))
    else:
        out = py_traceback.format_exc(*args, **kwargs)
    if verbose: # pragma: no cover
        print(out)
    return out




[docs]
def getuser():
    """
    Get the current username

    Alias to :func:`getpass.getuser()` -- see https://docs.python.org/3/library/getpass.html#getpass.getuser

    **Example**::

        sc.getuser()

    *New in version 3.0.0.*
    """
    return getpass.getuser()




[docs]
def getplatform(expected=None, platform=None, die=False):
    """
    Return the name of the current "main" platform (e.g. 'mac')

    Alias to ``sys.platform``, except maps entries onto one of 'linux', 'windows',
    'mac', or 'other'.

    Args:
        expected (str): if not None, check if the current platform is this
        platform (str): if supplied, map this onto one of the "main" platforms, rather than determine it
        die (bool): if True and expected is defined, raise an exception

    Returns:
        String, one of: 'linux', 'windows', 'mac', or 'other'

    **Examples**::

        sc.getplatform() # Get current name of platform
        sc.getplatform('windows', die=True) # Raise an exception if not on Windows
        sc.getplatform(platform='darwin') # Normalize to 'mac'
    """
    # Define different aliases for each operating system
    mapping = dict(
        linux   = ['linux', 'posix'],
        windows = ['windows', 'win', 'win32', 'cygwin', 'nt'],
        mac     = ['mac', 'macos', 'darwin', 'osx']
    )

    # Check to see what system it is
    sys_plat = sys.platform if platform is None else platform
    plat = 'other'
    for key,aliases in mapping.items():
        if sys_plat.lower() in aliases:
            plat = key
            break

    # Handle output
    if expected is not None:
        output = (expected.lower() in mapping[plat]) # Check if it's as expecte
        if not output and die: # pragma: no cover
            errormsg = f'System is "{plat}", not "{expected}"'
            raise EnvironmentError(errormsg)
    else: # pragma: no cover
        output = plat
    return output




[docs]
def iswindows(die=False):
    """ Alias to :func:`sc.getplatform('windows') <getplatform>` """
    return getplatform('windows', die=die)



[docs]
def islinux(die=False):
    """ Alias to :func:`sc.getplatform('linux') <getplatform>` """
    return getplatform('linux', die=die)



[docs]
def ismac(die=False):
    """ Alias to :func:`sc.getplatform('mac') <getplatform>` """
    return getplatform('mac', die=die)




[docs]
def isjupyter(detailed=False):
    """
    Check if a command is running inside a Jupyter notebook.

    Returns true/false if detailed=False, or a string for the exact type of notebook
    (e.g., Google Colab) if detailed=True.

    Args:
        detailed (bool): return a string of IPython/Jupyter type instead of true/false
        verbose (bool): print out additional information if IPython can't be imported

    **Examples**::

        if sc.isjupyter():
            sc.options(jupyter=True)

        if sc.isjupyter(detailed=True) == 'colab':
            print('You are running on Google Colab')

    *New in version 3.0.0.*
    """
    # First check if we can import it
    output = None
    is_jupyter = False
    try:
        from IPython import get_ipython
        ipython = get_ipython()
    except Exception as E: # pragma: no cover
        output = f'Python (Jupyter/IPython is not installed) ({E})'
        return output if detailed else is_jupyter

    # It's not IPython
    if ipython is None:
        output = 'Python (Jupyter/IPython is installed but not running)'
        return output if detailed else is_jupyter

    # It is IPython, keep checking
    else: # pragma: no cover

        # Get the modules and classes
        classes = inspect.getmro(ipython.__class__)
        names = [str(c).lower() for c in classes] # Names as a list
        firstname = names[0]
        allnames = ', '.join(names)

        # Define strings to look for
        mapping = dict(
            jupyter = 'zmqinteractive',
            ipython = 'terminalinteractive',
            spyder  = 'spyder',
            colab   = 'colab',
        )

        # First check if it's Jupyter, and return if it is
        if not detailed:
            if mapping['jupyter'] in allnames and mapping['spyder'] not in allnames: # Spyder inherits from ZMQ, unlike e.g. Colab shell
                is_jupyter = True
            return is_jupyter

        # Otherwise, get the full name
        else:
            if   mapping['spyder']  in firstname: output = 'Spyder'
            elif mapping['colab']   in firstname: output = 'Google Colab'
            elif mapping['ipython'] in firstname: output = 'IPython'
            elif mapping['jupyter'] in firstname: output = 'Jupyter'
            else:
                output = names[0]

            return output




[docs]
def asciify(string, form='NFKD', encoding='ascii', errors='ignore', **kwargs):
    """
    Convert an arbitrary Unicode string to ASCII.

    Args:
        form (str): the type of Unicode normalization to use
        encoding (str): the output string to encode to
        errors (str): how to handle errors
        kwargs (dict): passed to :meth:`string.decode()`

    **Example**::
        sc.asciify('föö→λ ∈ ℝ') # Returns 'foo  R'

    *New in version 2.0.1.*
    """
    normalized = unicodedata.normalize(form, string) # First, normalize Unicode encoding
    encoded = normalized.encode(encoding, errors) # Then, convert to ASCII
    decoded = encoded.decode(**kwargs) # Finally, decode back to utf-8
    return decoded


##############################################################################
#%% Web/HTML functions
##############################################################################

__all__ += ['urlopen', 'wget', 'download', 'htmlify']


[docs]
def urlopen(url, filename=None, save=None, headers=None, params=None, data=None,
            prefix='http', convert=True, die=False, response='text', verbose=False):
    """
    Download a single URL.

    Alias to ``urllib.request.urlopen(url).read()``. See also :func:`sc.download() <download>`
    for downloading multiple URLs. Note: :func:`sc.urlopen() <urlopen>`/:func:`sc.wget() <wget>` are aliases.

    Args:
        url (str): the URL to open, either as GET or POST
        filename (str): if supplied, save to file instead of returning output
        save (bool): if supplied instead of ``filename``, then use the default filename
        headers (dict): a dictionary of headers to pass
        params (dict): a dictionary of parameters to pass to the GET request
        data (dict) a dictionary of parameters to pass to a POST request
        prefix (str): the string to ensure the URL starts with (else, add it)
        convert (bool): whether to convert from bytes to string
        die (bool): whether to raise an exception if converting to text failed
        response (str): what to return: 'text' (default), 'json' (dictionary version of the data), 'status' (the HTTP status), or 'full' (the full response object)
        verbose (bool): whether to print progress

    **Examples**::

        html = sc.urlopen('wikipedia.org') # Retrieve into variable html
        sc.urlopen('http://wikipedia.org', filename='wikipedia.html') # Save to file wikipedia.html
        sc.urlopen('https://wikipedia.org', save=True, headers={'User-Agent':'Custom agent'}) # Save to the default filename (here, wikipedia.org), with headers
        sc.urlopen('wikipedia.org', response='status') # Only return the HTTP status of the site

    | *New in version 2.0.0:* renamed from ``wget`` to ``urlopen``; new arguments
    | *New in version 2.0.1:* creates folders by default if they do not exist
    | *New in version 2.0.4:* "prefix" argument, e.g. prepend "http://" if not present
    | *New in version 3.1.4:* renamed "return_response" to "response"; additional options
    """
    from urllib import request as ur # Need to import these directly, not via urllib
    from urllib import parse as up

    T = sc.timer()

    # Handle headers
    default_headers = {
        'User-Agent': 'Python/Sciris', # Some URLs require this
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', # Default Chrome/Safari header
    }
    headers = mergedicts(default_headers, headers)

    # Handle parameters and data
    full_url = url
    if prefix is not None:
        if not full_url.startswith(prefix):
            full_url = prefix + '://' + full_url # Leaves https alone, but adds http:// otherwise
    if params is not None: # pragma: no cover
        full_url = full_url + '?' + up.urlencode(params)
    if data is not None: # pragma: no cover
        data = up.urlencode(data).encode(encoding='utf-8', errors='ignore')

    if verbose: print(f'Downloading {url}...')
    request = ur.Request(full_url, headers=headers, data=data)
    resp = ur.urlopen(request) # Actually open the URL
    if response in ['text', 'json']:
        output = resp.read()
        if response == 'json':
            try:
                output = sc.loadjson(string=output)
            except Exception as E:
                errormsg = f'Could not convert HTTP response beginning "{output[:20]}" to JSON'
                raise ValueError(errormsg) from E
        elif convert:
            if verbose>1: print('Converting from bytes to text...')
            try:
                output = output.decode()
            except Exception as E: # pragma: no cover
                if die:
                    raise E
                elif verbose:
                    errormsg = f'Could not decode to text: {str(E)}'
                    print(errormsg)
    elif response == 'status':
        output = resp.status
    elif response == 'full':
        output = resp
    else:
        errormsg = f'Unrecognized option "{response}"; must be one of "text", "json", "status", or "full"'
        raise ValueError(errormsg)

    # Set filename -- from https://stackoverflow.com/questions/31804799/how-to-get-pdf-filename-with-python-requests
    if filename is None and save: # pragma: no cover
        headers = dict(resp.getheaders())
        string = "Content-Disposition"
        if string in headers.keys():
            filename = re.findall("filename=(.+)", headers[string])[0]
        else:
            filename = url.rstrip('/').split('/')[-1] # Remove trailing /, then pull out the last chunk

    if filename is not None and save is not False:
        if verbose: print(f'Saving to {filename}...')
        filename = sc.makefilepath(filename, makedirs=True)
        if isinstance(output, bytes): # Raw HTML data
            with open(filename, 'wb') as f: # pragma: no cover
                f.write(output)
        elif isinstance(output, dict): # If it's a JSON
            sc.savejson(filename, output)
        else: # Other text
            with open(filename, 'w', encoding='utf-8') as f: # Explicit encoding to avoid issues on Windows
                f.write(output)
        output = filename

    if verbose:
        T.toc(f'Time to download {url}')

    return output


# Alias for backwards compatibility
wget = urlopen



[docs]
def download(url, *args, filename=None, save=True, parallel=True, die=True, verbose=True, **kwargs):
    """
    Download one or more URLs in parallel and return output or save them to disk.

    A parallelized wrapper for :func:`sc.urlopen() <urlopen>`, except with ``save=True`` by default.

    Args:
        url (str/list/dict): either a single URL, a list of URLs, or a dict of key:URL or filename:URL pairs
        *args (list): additional URLs to download
        filename (str/list): either a string or a list of the same length as ``url`` (if not supplied, return output)
        save (bool): if supplied instead of ``filename``, then use the default filename
        parallel (bool): whether to download multiple URLs in parallel
        die (bool): whether to raise an exception if a URL can't be retrieved (default true)
        verbose (bool): whether to print progress (if verbose=2, print extra detail on each downloaded URL)
        **kwargs (dict): passed to :func:`sc.urlopen() <urlopen>`

    **Examples**::

        html = sc.download('http://sciris.org') # Download a single URL
        data = sc.download('http://sciris.org', 'http://covasim.org', save=False) # Download two in parallel
        sc.download({'sciris.html':'http://sciris.org', 'covasim.html':'http://covasim.org'}) # Download two and save to disk
        sc.download(['http://sciris.org', 'http://covasim.org'], filename=['sciris.html', 'covasim.html']) # Ditto
        data = sc.download(dict(sciris='http://sciris.org', covasim='http://covasim.org'), save=False) # Download and store in memory


    | *New in version 2.0.0.*
    | *New in version 3.0.0:* "die" argument
    | *New in version 3.1.1:* default order switched from URL:filename to filename:URL pairs
    | *New in version 3.1.3:* output as objdict instead of odict
    """
    T = sc.timer()

    # Parse arguments
    if isinstance(url, dict):
        keys = list(url.keys())
        urls = list(url.values())
        if any(['http' in key for key in keys]) and not any(['http' in url for url in urls]):
            keys,urls = urls,keys # Swap order, expect http to be in the URLs but not the keys
    else:
        keys = mergelists(filename)
        urls = mergelists(url, *args)

    # Ensure consistency
    n_urls = len(urls)
    n_keys = len(keys)
    if not n_keys:
        if save: # Filenames generated by urlopen(), pass None
            keys = [None]*n_urls
        else: # Otherwise, turn the URL into a filename
            keys = [url.split('/')[-1] for url in urls] # Turn e.g. 'http://mysite.com/tree/index.html' into 'index.html'
    elif n_keys != n_urls: # pragma: no cover
        errormsg = f'Cannot process {n_urls} URLs and {n_keys} filenames'
        raise ValueError(errormsg)

    if verbose and n_urls > 1:
        print(f'Downloading {n_urls} URLs...')

    # Get results in parallel
    wget_verbose = (verbose>1) or (verbose and n_urls == 1) # By default, don't print progress on each download
    iterkwargs = dict(url=urls, filename=keys)
    func_kwargs = mergedicts(dict(save=save, verbose=wget_verbose), kwargs)
    if n_urls > 1 and parallel:
        outputs = sc.parallelize(urlopen, iterkwargs=iterkwargs, kwargs=func_kwargs, parallelizer='thread', die=die)
    else:
        outputs = []
        for key,url in zip(keys, urls):
            try:
                output = urlopen(url=url, filename=key, **func_kwargs)
            except Exception as E: # pragma: no cover
                if die:
                    raise E
                else:
                    warnmsg = f'Could not download {url}: {E}'
                    warnings.warn(warnmsg, category=RuntimeWarning, stacklevel=2)
                    output = E

            outputs.append(output)

    # If we're returning the data rather than saving the files, convert to an odict
    if not save:
        outputs = sc.objdict({k:v for k,v in zip(keys, outputs)})

    if verbose and n_urls > 1:
        T.toc(f'Time to download {n_urls} URLs')
    if n_urls == 1:
        outputs = outputs[0]

    return outputs




[docs]
def htmlify(string, reverse=False, tostring=False):
    """
    Convert a string to its HTML representation by converting unicode characters,
    characters that need to be escaped, and newlines. If reverse=True, will convert
    HTML to string. If tostring=True, will convert the bytestring back to Unicode.

    **Examples**::

        output = sc.htmlify('foo&\\nbar') # Returns b'foo&amp;<br>bar'
        output = sc.htmlify('föö&\\nbar', tostring=True) # Returns 'f&#246;&#246;&amp;&nbsp;&nbsp;&nbsp;&nbsp;bar'
        output = sc.htmlify('foo&amp;<br>bar', reverse=True) # Returns 'foo&\\nbar'
    """
    import html
    if not reverse: # Convert to HTML
        output = html.escape(string).encode('ascii', 'xmlcharrefreplace') # Replace non-ASCII characters
        output = output.replace(b'\n', b'<br>') # Replace newlines with <br>
        output = output.replace(b'\t', b'&nbsp;&nbsp;&nbsp;&nbsp;') # Replace tabs with 4 spaces
        if tostring: # Convert from bytestring to unicode # pragma: no cover
            output = output.decode()
    else: # Convert from HTML
        output = html.unescape(string)
        output = output.replace('<br>','\n').replace('<br />','\n').replace('<BR>','\n')
    return output



##############################################################################
#%% Type functions
##############################################################################

__all__ += ['flexstr', 'sanitizestr', 'isiterable', 'checktype', 'isnumber', 'isstring', 'isarray', 'isfunc', 'ismodule',
            'toarray', 'tolist', 'promotetoarray', 'promotetolist', 'transposelist',
            'swapdict', 'mergedicts', 'mergelists', 'ifelse']


[docs]
def flexstr(arg, *args, force=True, join=''):
    """
    Try converting any object to a "regular" string (i.e. ``str``), but proceed
    if it fails. Note: this function calls ``repr()`` rather than ``str()`` to
    ensure a more robust representation of objects.

    Args:
        arg (any): the object to convert to a string
        args (list): additional arguments
        force (bool): whether to force it to be a string
        join (str): if multiple arguments are provided, the character to use to join

    **Example**::

        sc.flexstr(b'foo', 'bar', [1,2]) # Returns 'foobar[1, 2]'

    *New in version 3.0.0:* handle multiple inputs
    """
    arglist = mergelists(arg, list(args))
    outlist = []
    for arg in arglist:
        if isinstance(arg, str):
            output = arg
        elif isinstance(arg, bytes):
            try:
                output = arg.decode() # If it's bytes, decode to unicode
            except: # pragma: no cover
                if force: output = repr(arg) # If that fails, just print its representation
                else:     output = arg
        else: # pragma: no cover
            if force: output = repr(arg)
            else:     output = arg # Optionally don't do anything for non-strings
        outlist.append(output)

    if len(outlist) == 1:
        outstr = outlist[0]
    else: # pragma: no cover
        if force:
            outstr = join.join(outlist)
        else:
            outstr = outlist

    return outstr




[docs]
def sanitizestr(string=None, alphanumeric=False, nospaces=False, asciify=False,
                lower=False, validvariable=False, spacechar='_', symchar='?'):
    """
    Remove all non-"standard" characters from a string

    Can be used to e.g. generate a valid variable name from arbitrary input, remove
    non-ASCII characters (replacing with equivalent ASCII ones if possible), etc.

    Args:
        string (str): the string to sanitize
        alphanumeric (bool): allow only alphanumeric characters
        nospaces (bool): remove spaces
        asciify (bool): remove non-ASCII characters
        lower (bool): convert uppercase characters to lowercase
        validvariable (bool): convert to a valid Python variable name (similar to alphanumeric=True, nospaces=True; uses spacechar to substitute)
        spacechar (str): if nospaces is True, character to replace spaces with (can be blank)
        symchar (str): character to replace non-alphanumeric characters with (can be blank)

    **Examples**::

        string1 = 'This Is a String'
        sc.sanitizestr(string1, lower=True) # Returns 'this is a string'

        string2 = 'Lukáš wanted €500‽'
        sc.sanitizestr(string2, asciify=True, nospaces=True, symchar='*') # Returns 'Lukas_wanted_*500*'

        string3 = '"Ψ scattering", María said, "at ≤5 μm?"'
        sc.sanitizestr(string3, asciify=True, alphanumeric=True, nospaces=True, spacechar='') # Returns '??scattering??Mariasaid??at?5?m??'

        string4 = '4 path/names/to variable!'
        sc.sanitizestr(string4, validvariable=True, spacechar='') # Returns '_4pathnamestovariable'

    *New in version 3.0.0.*
    """
    string = flexstr(string)
    if asciify:
        newstr = ''
        for char in string:
            newchar = unicodedata.normalize('NFKD', char).encode('ascii', 'ignore').decode()
            if not len(newchar):
                newchar = symchar
            newstr += newchar
        string = newstr
    if nospaces:
        string = string.replace(' ', spacechar)
    if lower:
        string = string.lower()
    if alphanumeric:
        space = spacechar if nospaces else ' '
        string = re.sub(f'[^0-9a-zA-Z{space}]', symchar, string) # If not for the space string, could use /w
    if validvariable:
        string = re.sub(r'\W', spacechar, string) # /W matches an non-alphanumeric character; use a raw string to avoid warnings
        if not string or str.isdecimal(string[0]): # Don't allow leading decimals: here in case spacechar is None
            string = '_' + string
    return string




[docs]
def isiterable(obj, *args, exclude=None, minlen=None):
    """
    Determine whether or not the input is iterable, with optional types to exclude.

    Args:
        obj (any): object to check for iterability
        args (any): additional objects to check for iterability
        exclude (list): list of iterable objects to exclude (e.g., strings)
        minlen (int): if not None, check that an object has a defined length as well

    **Examples**::

        obj1 = [1,2,3]
        obj2 = 'abc'
        obj3 = set()

        sc.isiterable(obj1) # Returns True
        sc.isiterable(obj1, obj2, obj3, exclude=str, minlen=1) # returns [True, False, False]

    See also :func:`numpy.iterable()` for a simpler version.

    *New in version 3.0.0:* "exclude" and "minlen" args; support multiple arguments
    """

    # Handle arguments
    objlist = [obj]
    n_args = len(args)
    exclude = tuple(tolist(exclude))
    if n_args: # pragma: no cover
        objlist.extend(args)

    # Determine iterability
    output = []
    for obj in objlist:

        # Basic test of iterability
        tf = np.iterable(obj)

        # Additional checks
        if tf:

            # Explicitly exclude types
            if exclude and checktype(obj, exclude):
                tf = False

            # Check length
            if minlen is not None: # pragma: no cover
                try:
                    assert len(obj) >= minlen
                except:
                    tf = False

        output.append(tf)

    if not n_args:
        output = output[0]

    return output




[docs]
def checktype(obj=None, objtype=None, subtype=None, die=False):
    """
    A convenience function for checking instances. If objtype is a type,
    then this function works exactly like isinstance(). But, it can also
    be one of the following strings:

        - 'str', 'string': string or bytes object
        - 'num', 'number': any kind of number
        - 'arr', 'array': a Numpy array (equivalent to np.ndarray)
        - 'listlike': a list, tuple, or array
        - 'arraylike': a list, tuple, or array with numeric entries
        - 'none': a None object

    If subtype is not None, then checktype will iterate over the object and check
    recursively that each element matches the subtype.

    Args:
        obj     (any):         the object to check the type of
        objtype (str or type): the type to confirm the object belongs to
        subtype (str or type): optionally check the subtype if the object is iterable
        die     (bool):        whether or not to raise an exception if the object is the wrong type

    **Examples**::

        sc.checktype(rand(10), 'array', 'number') # Returns True
        sc.checktype(['a','b','c'], 'listlike') # Returns True
        sc.checktype(['a','b','c'], 'arraylike') # Returns False
        sc.checktype([{'a':3}], list, dict) # Returns True

    | *New in version 2.0.1:* ``pd.Series`` considered 'array-like'
    | *New in version 3.0.0:* allow list (in addition to tuple) of types; allow checking for NoneType
    | *New in version 3.1.3:* handle exceptions when casting to "arraylike"
    """

    # Handle "objtype" input
    if isinstance(objtype, str): # Ensure it's lowercase (e.g. 'None' → 'none')
        objtype = objtype.lower()
    if   objtype in ['none']:                  objinstance = type(None) # NoneType not available in Python <3.10
    elif objtype in ['str','string']:          objinstance = _stringtypes
    elif objtype in ['num', 'number']:         objinstance = _numtype
    elif objtype in ['bool', 'boolean']:       objinstance = _booltypes
    elif objtype in ['arr', 'array']:          objinstance = np.ndarray
    elif objtype in ['listlike', 'arraylike']: objinstance = (list, tuple, np.ndarray, pd.Series) # Anything suitable as a numerical array
    elif type(objtype) == type:                objinstance = objtype # Don't need to do anything
    elif isinstance(objtype, tuple):           objinstance = objtype # Ditto
    elif isinstance(objtype, list):            objinstance = tuple(objtype) # Convert from a list to a tuple # pragma: no cover
    elif objtype is None: # pragma: no cover
        errormsg = "No object type was supplied; did you mean to use objtype='none' instead?"
        raise ValueError(errormsg)
    else: # pragma: no cover
        errormsg = f'Could not understand what type you want to check: should be either a string or a type, not "{objtype}"'
        raise ValueError(errormsg)

    # Do first-round checking
    result = isinstance(obj, objinstance)

    # Do second round checking
    if result and objtype == 'arraylike': # Special case for handling arrays which may be multi-dimensional
        try:
            obj = toarray(obj).flatten() # Flatten all elements
        except: # If it can't be cast to an array, it's not array-like
            result = False
        if subtype is None: # Add additional check for numeric entries
            subtype = _numtype + _booltypes
    if isiterable(obj) and subtype is not None:
        for item in obj:
            result = result and checktype(item, subtype)

    # Decide what to do with the information thus gleaned
    if die: # Either raise an exception or do nothing if die is True
        if not result: # It's not an instance
            errormsg = f'Incorrect type: object is {type(obj)}, but {objtype} is required'
            raise TypeError(errormsg)
        else:
            return # It's fine, do nothing
    else: # Return the result of the comparison
        return result




[docs]
def isnumber(obj, isnan=None):
    """
    Determine whether or not the input is a number.

    Identical to isinstance(obj, numbers.Number) unless isnan is specified.

    Args:
        obj (any): the object to check if it's a number
        isnan (bool): an optional additional check to determine whether the number is/isn't NaN

    | *New in version 3.2.0:* use ``isinstance()`` directly
    """
    output = isinstance(obj, _numtype)
    if output and isnan is not None: # It is a number, so can check for nan # pragma: no cover
        output = (np.isnan(obj) == isnan) # See if they match
    return output




[docs]
def isstring(obj):
    """
    Determine whether or not the input is string-like (i.e., str or bytes).

    Equivalent to ``isinstance(obj, (str, bytes))``
    """
    return isinstance(obj, _stringtypes)




[docs]
def isarray(obj, dtype=None):
    """
    Check whether something is a Numpy array, and optionally check the dtype.

    Almost the same as ``isinstance(obj, np.ndarray)``.

    **Example**::

        sc.isarray(np.array([1,2,3]), dtype=float) # False, dtype is int

    | *New in version 1.0.0.*
    | *New in version 3.1.6:* explicit False return
    """
    if isinstance(obj, np.ndarray):
        if dtype is None:
            return True
        else:
            if obj.dtype == dtype: # pragma: no cover
                return True
            else:
                return False
    return False




[docs]
def isfunc(obj):
    """
    Quickly check if something is a function.

    This checks whether or not something is a method (``types.MethodType``)
    or a function (``types.FunctionType``), which is different to whether or not
    it is callable (e.g., classes are callable).

    Note: Python doesn't have a crystal-clear distinction between things that are
    and aren't functions, so there may be some edge cases with this function. For
    example, ``dict.fromkeys`` is a ``builtin_function_or_method``, which returns
    True. So is ``list().pop`` and ``list().remove``. But ``list.pop`` and ``list.remove``
    are different types, and return ``False``. The full list of types it checks against is:

        - types.FunctionType
        - types.MethodType
        - types.BuiltinFunctionType
        - types.BuiltinMethodType
        - types.LambdaType
        - functools.partial
        - staticmethod
        - classmethod

    **Example**::

        sc.isfunc(list) # Returns False
        callable(list) # Returns True


    | *New in version 3.2.0.*
    | *New in version 3.2.3:* also checks for partial and lambda functions
    """
    typelist = (
        types.FunctionType,
        types.MethodType,
        types.BuiltinFunctionType,
        types.BuiltinMethodType,
        types.LambdaType,
        functools.partial,
        staticmethod,
        classmethod,
    )
    return isinstance(obj, typelist)




[docs]
def ismodule(obj):
    """
    Determine whether or not the input is a module.

    Equivalent to ``isinstance(obj, (types.ModuleType))``
    """
    return isinstance(obj, (types.ModuleType))




[docs]
def toarray(x, keepnone=False, asobject=True, dtype=None, **kwargs):
    """
    Small function to ensure consistent format for things that should be arrays
    (note: :func:`sc.toarray() <toarray>` and :func:`sc.promotetoarray() <promotetoarray>` are identical).

    Very similar to :func:`numpy.array`, with the main difference being that :func:`sc.toarray(3) <toarray>`
    will return ``np.array([3])`` (i.e. a 1-d array that can be iterated over), while
    ``np.array(3)`` will return a 0-d array that can't be iterated over.

    Args:
        x (any): a number or list of numbers
        keepnone (bool): whether ``sc.toarray(None)`` should return ``np.array([])`` or ``np.array([None], dtype=object)``
        asobject (bool): whether to prefer to coerce arrays to object type rather than string
        kwargs (dict): passed to :func:`numpy.array()`

    **Examples**::

        sc.toarray(5) # Returns np.array([5])
        sc.toarray([3,5]) # Returns np.array([3,5])
        sc.toarray(None, skipnone=True) # Returns np.array([])
        sc.toarray([1, 'foo']) # Returns np.array([1, 'foo'], dtype=object)

    | *New in version 1.1.0:* replaced "skipnone" with "keepnone"; allowed passing kwargs to ``np.array()``.
    | *New in version 2.0.1:* added support for pandas Series and DataFrame
    | *New in version 3.1.0:* "asobject" argument; cast mixed-type arrays to object rather than string by default
    """
    # Handle None
    skipnone = kwargs.pop('skipnone', None)
    if skipnone is not None: # pragma: no cover
        keepnone = not(skipnone)
        warnmsg = 'sc.toarray() argument "skipnone" has been deprecated as of v1.1.0; use keepnone instead'
        warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)

    # Handle different inputs
    if isnumber(x) or (isinstance(x, np.ndarray) and not np.shape(x)): # e.g. 3 or np.array(3)
        x = [x]
    elif isinstance(x, (pd.DataFrame, pd.Series)):
        x = x.values
    elif x is None and not keepnone:
        x = []

    # Actually convert to an array
    output = np.array(x, dtype=dtype, **kwargs)
    if asobject and (dtype is None) and issubclass(output.dtype.type, np.str_):
        output = np.array(x, dtype=object, **kwargs) # Cast to object instead of string

    return output




[docs]
def tolist(obj=None, objtype=None, keepnone=False, coerce='default'):
    """
    Make sure object is always a list (note: :func:`sc.tolist() <tolist>`/:func:`sc.promotetolist() <promotetolist>` are identical).

    Used so functions can handle inputs like ``'a'``  or ``['a', 'b']``. In other
    words, if an argument can either be a single thing (e.g., a single dict key)
    or a list (e.g., a list of dict keys), this function can be used to do the
    conversion, so it's always safe to iterate over the output.

    While this usually wraps objects in a list rather than converts them to a list,
    the "coerce" argument can be used to change this behavior. Options are:

    - 'none' or None: do not coerce
    - 'default': coerce objects that were lists in Python 2 (range, map, dict_keys, dict_values, dict_items)
    - 'tuple': all the types in default, plus tuples
    - 'full': all the types in default, plus tuples and arrays

    Args:
        obj (anything): object to ensure is a list
        objtype (anything): optional type to check for each element; see :func:`sc.checktype() <checktype>` for details
        keepnone (bool): if ``keepnone`` is false, then ``None`` is converted to ``[]``; else, it's converted to ``[None]``
        coerce (str/tuple):  tuple of additional types to coerce to a list (as opposed to wrapping in a list)

    See also :func:`sc.mergelists() <mergelists>` to handle multiple input arguments.

    **Examples**::

        sc.tolist(5) # Returns [5]
        sc.tolist(np.array([3,5])) # Returns [np.array([3,5])] -- not [3,5]!
        sc.tolist(np.array([3,5]), coerce=np.ndarray) # Returns [3,5], since arrays are coerced to lists
        sc.tolist(None) # Returns []
        sc.tolist(range(3)) # Returns [0,1,2] since range is coerced by default
        sc.tolist(['a', 'b', 'c'], objtype='number') # Raises exception

        def myfunc(data, keys):
            keys = sc.tolist(keys)
            for key in keys:
                print(data[key])

        data = {'a':[1,2,3], 'b':[4,5,6]}
        myfunc(data, keys=['a', 'b']) # Works
        myfunc(data, keys='a') # Still works, equivalent to needing to supply keys=['a'] without tolist()

    | *New in version 1.1.0:* "coerce" argument
    | *New in version 1.2.2:* default coerce values
    | *New in version 2.0.2:* tuple coersion
    """
    # Handle coerce
    default_coerce = (range, map, type({}.keys()), type({}.values()), type({}.items()))
    if isinstance(coerce, str):
        if coerce == 'none':
            coerce = None
        elif coerce == 'default':
            coerce = default_coerce
        elif coerce == 'tuple': # pragma: no cover
            coerce = default_coerce + (tuple,)
        elif coerce == 'array': # pragma: no cover
            coerce = default_coerce + (np.ndarray,)
        elif coerce == 'full':
            coerce = default_coerce + (tuple, np.ndarray)
        else: # pragma: no cover
            errormsg = f'Option "{coerce}"; not recognized; must be "none", "default", "tuple", or "full"'

    if objtype is None: # Don't do type checking
        if isinstance(obj, list):
            output = obj # If it's already a list and we're not doing type checking, just return
        elif obj is None:
            if keepnone:
                output = [None] # Wrap in a list
            else:
                output = [] # Return an empty list, the "none" equivalent for a list
        else:
            if coerce is not None and isinstance(obj, coerce):
                output = list(obj) # Coerce to list
            else:
                output = [obj] # Main usage case -- listify it
    else: # Do type checking
        if checktype(obj=obj, objtype=objtype, die=False):
            output = [obj] # If the object is already of the right type, wrap it in a list
        else:
            try:
                if not isiterable(obj): # Ensure it's iterable -- a mini promote-to-list
                    iterable_obj = [obj]
                else:
                    iterable_obj = obj
                for item in iterable_obj:
                    checktype(obj=item, objtype=objtype, die=True)
                output = list(iterable_obj) # If all type checking passes, cast to list instead of wrapping
            except TypeError as E:
                errormsg = f'tolist(): type mismatch, expecting type {objtype}'
                raise TypeError(errormsg) from E
    return output



# Aliases for core functions
promotetoarray = toarray
promotetolist = tolist



[docs]
def transposelist(obj, fix_uneven=True):
    """
    Convert e.g. a list of key-value tuples into a list of keys and a list of values.

    Args:
        obj (list): the list-of-lists to be transposed
        fix_uneven (bool): append None values where needed so all input lists have the same length

    **Examples**::

        o = sc.odict(a=1, b=4, c=9, d=16)
        itemlist = o.enumitems()
        inds, keys, vals = sc.transposelist(itemlist)

        listoflists = [
            ['a', 1, 3],
            ['b', 4, 5],
            ['c', 7, 8, 9, 10]
        ]
        trans = sc.transposelist(listoflists, fix_uneven=True)

    *New in version 1.1.0.*
    """
    if fix_uneven:
        maxlen = max([len(ls) for ls in obj])
        newobj = [] # Create a manual copy
        for ls in obj:
            row = [item for item in ls] + [None]*(maxlen - len(ls))
            newobj.append(row)
        obj = newobj

    return list(map(list, zip(*obj)))





[docs]
def swapdict(d):
    """
    Swap the keys and values of a dictionary. Equivalent to {v:k for k,v in d.items()}

    Args:
        d (dict): dictionary

    **Example**::
        d1 = {'a':'foo', 'b':'bar'}
        d2 = sc.swapdict(d1) # Returns {'foo':'a', 'bar':'b'}

    *New in version 1.3.0.*
    """
    if not isinstance(d, dict):
        errormsg = f'Not a dictionary: {type(d)}'
        raise TypeError(errormsg)
    try:
        output = {v:k for k,v in d.items()}
    except Exception as E:
        exc = type(E)
        errormsg = 'Could not swap keys and values: ensure all values are of hashable type'
        raise exc(errormsg) from E
    return output




[docs]
def mergedicts(*args, _strict=False, _overwrite=True, _copy=False, _sameclass=True, _die=True, **kwargs):
    """
    Small function to merge multiple dicts together.

    By default, skips any input arguments that are ``None``, and allows keys to be set
    multiple times. This function is similar to dict.update(), except it returns a value.
    The first dictionary supplied will be used for the output type (e.g. if the
    first dictionary is an odict, an odict will be returned).

    Note that arguments start with underscores to avoid possible collisions with
    keywords (e.g. :func:`sc.mergedicts(dict(loose=True, strict=True), strict=False, _strict=True) <mergedicts>`).

    **Note**: This function is similar to the "|" operator introduced in Python 3.9.
    However, ``sc.mergedicts()`` is useful for cases such as function arguments, where
    the default option is ``None`` but you will need a dict later on.

    Args:
        _strict    (bool): if True, raise an exception if an argument isn't a dict
        _overwrite (bool): if False, raise an exception if multiple keys are found
        _copy      (bool): whether or not to deepcopy the merged dictionary
        _sameclass (bool): whether to ensure the output has the same type as the first dictionary merged
        _die       (bool): whether to raise an exception if something goes wrong
        *args      (list): the sequence of dicts to be merged
        **kwargs   (dict): merge these into the dict as well

    **Examples**::

        d0 = sc.mergedicts(user_args) # Useful if user_args might be None, but d0 is always a dict
        d1 = sc.mergedicts({'a':1}, {'b':2}) # Returns {'a':1, 'b':2}
        d2 = sc.mergedicts({'a':1, 'b':2}, {'b':3, 'c':4}, None) # Returns {'a':1, 'b':3, 'c':4}
        d3 = sc.mergedicts(sc.odict({'b':3, 'c':4}), {'a':1, 'b':2}) # Returns sc.odict({'b':2, 'c':4, 'a':1})
        d4 = sc.mergedicts({'b':3, 'c':4}, {'a':1, 'b':2}, _overwrite=False) # Raises exception

    | *New in version 1.1.0:* "copy" argument
    | *New in version 1.3.3:* keywords allowed
    | *New in version 2.0.0:* keywords fully enabled; "_sameclass" argument
    | *New in version 2.0.1:* fixed bug with "_copy" argument
    """
    # Warn about deprecated keys
    renamed = ['strict', 'overwrite', 'copy']
    if any([k in kwargs for k in renamed]): # pragma: no cover
        warnmsg = f'sc.mergedicts() arguments "{strjoin(renamed)}" have been renamed with underscores as of v1.3.3; using these as keywords is undesirable'
        warnings.warn(warnmsg, category=FutureWarning, stacklevel=2)

    # Try to get the output type from the arguments, but revert to a standard dict if that fails
    outputdict = {}
    if _sameclass:
        for arg in args:
            if isinstance(arg, dict):
                try:
                    outputdict = arg.__class__() # This creates a new instance of the class
                    break
                except Exception as E: # pragma: no cover
                    errormsg = f'Could not create new dict of {type(args[0])} from first argument ({str(E)}); set _sameclass=False if this is OK'
                    if _die:
                        raise TypeError(errormsg) from E
                    else:
                        warnings.warn(errormsg, category=UserWarning, stacklevel=2)

    # Merge over the dictionaries in order
    args = list(args)
    args.append(kwargs) # Include any kwargs as the final dict
    for a,arg in enumerate(args):
        is_dict = isinstance(arg, dict)
        if _strict and not is_dict:
            errormsg = f'Argument {a} has {type(arg)}; must be dict since _strict=True'
            raise TypeError(errormsg)
        if is_dict:
            if not _overwrite:
                intersection = set(outputdict.keys()).intersection(arg.keys())
                if len(intersection):
                    errormsg = f'Could not merge dicts since keys "{strjoin(intersection)}" overlap and _overwrite=False'
                    raise KeyError(errormsg)
            outputdict.update(arg)
        else:
            if arg is not None and _die: # pragma: no cover
                errormsg = f'Could not handle argument {a} of {type(arg)}: expecting dict or None'
                raise TypeError(errormsg)

    if _copy:
        outputdict = dcp(outputdict, die=_die)
    return outputdict




[docs]
def mergelists(*args, coerce='default', copy=False, **kwargs):
    """
    Merge multiple lists together.

    Often used to flexible handle the input arguments to functions; see example
    below.

    Args:
        args (any): the lists, or items, to be joined together into a list
        coerce (str): what types of objects to treat as lists; see :func:`sc.tolist() <tolist>` for details
        copy (bool): whether to deepcopy the resultant object
        kwargs (dict): passed to :func:`sc.tolist() <tolist>`, which is called on each argument

    **Examples**::

        # Simple usage
        sc.mergelists(None)                                # Returns []
        sc.mergelists([1,2,3], [4,5,6])                    # Returns [1, 2, 3, 4, 5, 6]
        sc.mergelists([1,2,3], 4, 5, 6)                    # Returns [1, 2, 3, 4, 5, 6]
        sc.mergelists([(1,2), (3,4)], (5,6))               # Returns [(1, 2), (3, 4), (5, 6)]
        sc.mergelists((1,2), (3,4), (5,6))                 # Returns [(1, 2), (3, 4), (5, 6)]
        sc.mergelists((1,2), (3,4), (5,6), coerce='tuple') # Returns [1, 2, 3, 4, 5, 6]

        # Usage for handling flexible input arguments
        def my_func(arg=None, *args):
            arglist = sc.mergelists(arg, list(args))
            return arglist

        a = my_func()           # Returns []
        b = my_func([1,2,3])    # Returns [1,2,3]
        c = my_func(1,2,3)      # Returns [1,2,3]
        d = my_func([1,2], 3)   # Returns [1,2,3]
        f = my_func(1, *[2,3])  # Returns [1,2,3]
        e = my_func(1, [2,3])   # Returns [1,[2,3]] since second argument is ambiguous
        g = my_func([[1,2]], 3) # Returns [[1,2],3] since first argument is nested

    *New in version 1.1.0.*
    """
    obj = []
    for arg in args:
        arg = tolist(arg, coerce=coerce, **kwargs)
        obj.extend(arg)
    if copy:
        obj = dcp(obj)
    return obj




[docs]
def ifelse(*args, default=None, check=None):
    """
    For a list of inputs, return the first one that meets the condition

    By default, returns the first non-None item, but can also check truth value
    or an arbitrary function.

    Args:
        args (any): the arguments to check (note: cannot supply a single list, use * to unpack)
        default (any): the default value to use if no arguments meet the condition
        check (func): must be None (check if arguments are not None), bool (check if arguments evaluate True), or a callable (which returns True/False)

    Equivalent to ``next((arg for arg in args if check(arg)), default)``

    **Examples**::

        # 1. Standard usage
        a = None
        b = 3
        out = sc.ifelse(a, b)

        ## Equivalent to:
        out = a if a is not None else b

        # 2. Boolean usage
        args = ['', False, {}, 'ok']
        out = sc.ifelse(*args, check=bool)

        ## Equivalent to:
        out = next((arg for arg in args if arg), None)

        # 3. Custom function
        args = [1, 3, 5, 7]
        out = sc.ifelse(*args, check=lambda x: x>5)

        ## Equivalent to:
        out = None
        for arg in args:
          if arg > 5:
            out = val
            break

    | *New in version 3.1.5.*
    """
    # Handle check
    if check is None:
        check = lambda x: x is not None
    elif check in [bool, True]:
        check = bool
    elif not callable(check): # pragma: no cover
        errormsg = f'"check" must be None, bool, or callable, not "{check}"'
        raise ValueError(errormsg)

    # Actually calculate it
    out = next((arg for arg in args if check(arg)), default)
    return out



def _sanitize_iterables(obj, *args):
    """
    Take input as a list, array, pandas Series, or non-iterable type, along with
    one or more arguments, and return a list, along with information on what the
    input types were (is_list and is_array).

    Not intended for the user, but used internally.

    **Examples**::

        _sanitize_iterables(1, 2, 3)             # Returns [1,2,3], False, False
        _sanitize_iterables([1, 2], 3)           # Returns [1,2,3], True, False
        _sanitize_iterables(np.array([1, 2]), 3) # Returns [1,2,3], True, True
        _sanitize_iterables(np.array([1, 2, 3])) # Returns [1,2,3], False, True
    """
    is_list   = isinstance(obj, list) or len(args)>0 # If we're given a list of args, treat it like a list
    is_array  = isinstance(obj, (np.ndarray, pd.Series)) # Check if it's an array
    if is_array: # If it is, convert it to a list
        obj = obj.tolist()
    objs = dcp(tolist(obj)) # Ensure it's a list, and deepcopy to avoid mutability
    objs.extend(args) # Add on any arguments
    return objs, is_list, is_array


def _sanitize_output(obj, is_list, is_array, dtype=None):
    """
    The companion to _sanitize_iterables, convert the object back to the original
    type supplied. Not for the user.
    """
    if is_array:
        output = np.array(obj, dtype=dtype)
    elif not is_list and len(obj) == 1:
        output = obj[0]
    else:
        output = obj
    return output


##############################################################################
#%% Misc. functions
##############################################################################

__all__ += ['strjoin', 'newlinejoin', 'strsplit', 'runcommand', 'uniquename',
            'suggest', 'importbyname', 'importbypath']



[docs]
def strjoin(*args, sep=', '):
    """
    Like string ``join()``, but handles more flexible inputs, converts items to
    strings. By default, join with commas.

    Args:
        args (list): the list of items to join
        sep (str): the separator string

    **Example**::

        sc.strjoin([1,2,3], 4, 'five')

    *New in version 1.1.0.*
    """
    obj = []
    for arg in args:
        if isstring(arg):
            obj.append(arg)
        elif isiterable(arg):
            obj.extend([str(item) for item in arg])
        else: # pragma: no cover
            obj.append(str(arg))
    output = sep.join(obj)
    return output




[docs]
def newlinejoin(*args):
    """
    Alias to ``strjoin(*args, sep='\\n')``.

    **Example**::

        sc.newlinejoin([1,2,3], 4, 'five')

    *New in version 1.1.0.*
    """
    return strjoin(*args, sep='\n')




[docs]
def strsplit(string, sep=None, skipempty=True, lstrip=True, rstrip=True):
    """
    Convenience function to split common types of strings.

    Note: to use regular expressions, use :func:`re.split()` instead.

    Args:
        string    (str):      the string to split
        sep       (str/list): the types of separator to accept (default space or comma, i.e. [' ', ','])
        skipempty (bool):     whether to skip empty entries (i.e. from consecutive delimiters)
        lstrip    (bool):     whether to strip any extra spaces on the left
        rstrip    (bool):     whether to strip any extra spaces on the right

    Examples:

        sc.strsplit('a b c') # Returns ['a', 'b', 'c']
        sc.strsplit('a,b,c') # Returns ['a', 'b', 'c']
        sc.strsplit('a, b, c') # Returns ['a', 'b', 'c']
        sc.strsplit('  foo_bar  ', sep='_') # Returns ['foo', 'bar']

    *New in version 2.0.0.*
    """
    strlist = []
    if sep is None:
        sep = [' ', ',']

    # Generate a character sequence that isn't in the string
    special = '∙' # Pick an obscure Unicode character
    while special in string: # If it exists in the string nonetheless, keep going until it doesn't # pragma: no cover
        special += special

    # Convert all separators to the special character
    for s in sep:
        string = string.replace(s, special)

    # Split the string, filter, and trim
    strlist = string.split(special)
    if lstrip:
        strlist = [s.lstrip() for s in strlist]
    if rstrip:
        strlist = [s.rstrip() for s in strlist]
    if skipempty:
        strlist = [s for s in strlist if s != '']

    return strlist




[docs]
def runcommand(command, printinput=False, printoutput=None, wait=True, **kwargs):
    """
    Make it easier to run shell commands.

    Alias to :obj:`subprocess.Popen()`. Returns captured output if wait=True, else
    returns the subprocess.

    Args:
        command (str): the command to run
        printinput (bool): whether to print the input string
        printoutput (bool): whether to print the output (default: False if wait=True, True if wait=False)
        wait (bool): whether to wait for the process to return (else, return immediately with the subprocess)

    **Examples**::

        myfiles = sc.runcommand('ls').split('\\n') # Get a list of files in the current folder
        sc.runcommand('sshpass -f %s scp myfile.txt me@myserver:myfile.txt' % 'pa55w0rd', printinput=True, printoutput=True) # Copy a file remotely
        sc.runcommand('sleep 600; mkdir foo', wait=False) # Waits 10 min, then creates the folder "foo", but the function returns immediately
        sc.runcommand('find', wait=False) # Equivalent to executing 'find' in a terminal

    *New in version 3.1.1:* print real-time output if ``wait=False``
    """
    # Handle defaults and inputs
    if wait:
        defaults = dict(shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # Redirect both to the pipe
    else:
        defaults = dict(shell=True, bufsize=0)
    if printoutput is None:
        printoutput = False if wait else True
    kwargs = mergedicts(defaults, kwargs)
    if printinput:
        print(command)

    # Actually run the command
    try:
        p = subprocess.Popen(command, **kwargs)
        if wait: # Run in foreground, blocking
            output, _ = p.communicate() # ...and then retrieve stdout from the pipe
            try: # Try to decode
                output = output.decode('utf-8')
            except Exception as E: # If something goes wrong, just leave it # pragma: no cover
                warnmsg = f'Could not decode bytestring: {E}'
                warnings.warn(warnmsg, category=RuntimeWarning, stacklevel=2)
        else: # Run in background
            if printoutput: # ...but print output as it comes if asked
                while p.returncode is None:
                    stdout, stderr = p.communicate()
                    if stdout: print(stdout) # Usually None, but just in case
                    if stderr: print(stderr)
            output = p # Return the subprocess instead of the output
    except Exception as E: # pragma: no cover
        output = f'runcommand(): shell command failed: {str(E)}' # This is for a Python error, not a shell error -- those get passed to output

    if printoutput and wait:
        print(output)

    return output




[docs]
def uniquename(name=None, namelist=None, style=None, human=False, suffix=None):
    """
    Given a name and a list of other names, add a counter to the name so that
    it doesn't conflict with the other names.

    Useful for auto-incrementing filenames, etc.

    Args:
        name (str): the string to ensure is unique
        namelist (list): the list of strings that are taken
        style (str): a custom style for appending the counter that takes a single argument for the integer repeat; default '%d'
        human (bool): if True, use ' (%d)' as the style instead of '%d'
        suffix (str): if provided, remove this suffix from each name and add it back to the unique name

    **Examples**::

        sc.uniquename('out', ['out', 'out1']) # Returns 'out2'
        sc.uniquename(name='file', namelist=['file', 'file (1)', 'file (2)', 'myfile'], human=True) # Returns 'file (3)'
        sc.uniquename('results.csv', ['results.csv', 'results1.csv'], suffix='.csv') # Returns 'results2.csv'

    | *New in version 3.2.0:* "human" and "suffix" arguments, simpler default style
    """
    # Set the style if not specified
    if style is None:
        if human:
            style = ' (%d)'
        else:
            style = '%d'

    # Prepare the inputs
    name = str(name) # Ensure it's a string
    namelist = tolist(namelist)
    if suffix:
        name = name.removesuffix(suffix)
        namelist = [n.removesuffix(suffix) for n in namelist]

    # Do the loop
    i = 0
    unique_name = name # Start with the passed in name
    while unique_name in namelist: # Try adding an index (i) to the name until we find one that's unique
        i += 1
        unique_name = name + style%i # Convert from  the original name
    if suffix:
        unique_name += suffix
    return unique_name # Return the found name.




[docs]
def suggest(user_input, valid_inputs, n=1, threshold=None, fulloutput=False, die=False, which='damerau'):
    """
    Return suggested item

    Returns item with lowest Levenshtein distance, where case substitution and stripping
    whitespace are not included in the distance. If there are ties, then the additional operations
    will be included.

    Args:
        user_input (str): User's input
        valid_inputs (list): List/collection of valid strings
        n (int): Maximum number of suggestions to return
        threshold (int): Maximum number of edits required for an option to be suggested (by default, two-thirds the length of the input; for no threshold, set to -1)
        fulloutput (bool): Whether to return suggestions and distances.
        die (bool): If True, an informative error will be raised (to avoid having to implement this in the calling code)
        which (str): Distance calculation method used; options are "damerau" (default), "levenshtein", or "jaro"

    Returns:
        suggestions (str or list): Suggested string. Returns None if no suggestions with edit distance less than threshold were found. This helps to make
             suggestions more relevant.

    **Examples**::

        >>> sc.suggest('foo', ['Foo','Bar'])
        'Foo'
        >>> sc.suggest('foo', ['FOO','Foo'])
        'Foo'
        >>> sc.suggest('foo', ['Foo ','boo'])
        'Foo '
    """
    try:
        import jellyfish # To allow as an optional import
    except ModuleNotFoundError as e: # pragma: no cover
        raise ModuleNotFoundError('The "jellyfish" Python package is not available; please install via "pip install jellyfish"') from e

    valid_inputs = tolist(valid_inputs, objtype='string')

    # Handle version incompatibility
    try: # jellyfish >= 1.0
        jaro = jellyfish.jaro_similarity
    except:
        jaro = jellyfish.jaro_distance

    mapping = {
        'damerau':     jellyfish.damerau_levenshtein_distance,
        'levenshtein': jellyfish.levenshtein_distance,
        'jaro':        jaro,
    }

    keys = list(mapping.keys())
    if which not in keys: # pragma: no cover
        errormsg = f'Method {which} not available; options are {strjoin(keys)}'
        raise NotImplementedError(errormsg)

    dist_func = mapping[which]

    distance = np.zeros(len(valid_inputs))
    cs_distance = np.zeros(len(valid_inputs))
    # We will switch inputs to lowercase because we want to consider case substitution a 'free' operation
    # Similarly, stripping whitespace is a free operation. This ensures that something like
    # 'foo ' will match 'Foo' ahead of 'boo '
    for i, s in enumerate(valid_inputs):
        distance[i]    = dist_func(user_input, s.strip().lower())
        cs_distance[i] = dist_func(user_input, s.strip())

    # If there is a tie for the minimum distance, use the case sensitive comparison
    if sum(distance==min(distance)) > 1:
        distance = cs_distance

    # Order by distance, then pull out the right inputs, then turn them into a list
    order = np.argsort(distance)
    suggestions = [valid_inputs[i] for i in order]
    suggestionstr = strjoin([f'"{sugg}"' for sugg in suggestions[:n]])

    # Handle threshold
    if threshold is None:
        threshold = np.ceil(len(user_input)*2/3)
    if threshold < 0: # pragma: no cover
        threshold = np.inf

    # Output
    if min(distance) > threshold:
        if die: # pragma: no cover
            errormsg = f'"{user_input}" not found'
            raise ValueError(errormsg)
        else:
            return
    elif die:
        errormsg = f'"{user_input} not found - did you mean {suggestionstr}'
        raise ValueError(errormsg)
    else:
        if fulloutput:
            output = dict(zip(suggestions, distance[order]))
            return output
        else:
            if n==1:
                return suggestions[0]
            else:
                return suggestions[:n]



def _assign_to_namespace(var, obj, namespace=None, overwrite=True): # pragma: no cover
    """ Helper function to assign an object to the global namespace """
    if namespace is None:
        namespace = globals()
    if var in namespace and not overwrite:
        errormsg = f'Cannot assign to variable "{var}" since it already exists and overwrite=False'
        raise NameError(errormsg)
    namespace[var] = obj
    return



[docs]
def importbyname(module=None, variable=None, path=None, namespace=None, lazy=False, overwrite=True, die=True, verbose=True, **kwargs):
    """
    Import modules by name.

    ``sc.importbyname(x='y')`` is equivalent to "import y as x", but allows module
    importing to be done programmatically.

    See https://peps.python.org/pep-0690/ for a proposal for incorporating something
    similar into Python by default.

    Args:
        module (str): name of the module to import
        variable (str): the name of the variable to assign the module to (by default, the module's name)
        path (str/path): optionally load from path instead of by name
        namespace (dict): the namespace to load the modules into (by default, globals)
        lazy (bool): whether to create a LazyModule object instead of load the actual module
        overwrite (bool): whether to allow overwriting an existing variable (by default, yes)
        die (bool): whether to raise an exception if encountered
        verbose (bool): whether to print a warning if an module can't be imported
        **kwargs (dict): additional variable:modules pairs to import (see examples below)

    **Examples**::

        np = sc.importbyname('numpy') # Standard usage
        sc.importbyname(pd='pandas', np='numpy') # Use dictionary syntax to assign to namespace
        plt = sc.importbyname(plt='matplotlib.pyplot', lazy=True) # Won't actually import until e.g. plt.figure() is called
        mymod = sc.importbyname(path='/path/to/mymod') # Import by path rather than name

    See also :func:`sc.importbypath() <importbypath>`.

    | *New in version 2.1.0:* "verbose" argument
    | *New in version 3.0.0:* "path" argument
    """
    # Initialize
    if variable is None:
        variable = module

    # Map modules to variables
    mapping = {}
    if module is not None or path is not None:
        mapping[variable] = (path or module) # In this order so path takes precedence
    mapping.update(kwargs)

    # Load modules
    libs = []
    for variable,module in mapping.items():
        if lazy:
            lib = LazyModule(module=module, variable=variable, namespace=namespace)
        else:
            try:
                if path is not None:
                    lib = importbypath(path, variable)
                else:
                    lib = importlib.import_module(module)
            except Exception as E: # pragma: no cover
                errormsg = f'Cannot import "{module}" since {module} is not installed. Please install {module} and try again.'
                if verbose and not die:
                    print(errormsg)
                lib = None
                if die: raise E
                else:   return False

        _assign_to_namespace(var=variable, obj=lib, namespace=namespace, overwrite=overwrite)
        if namespace:
            namespace[variable] = lib
        libs.append(lib)

    if len(libs) == 1:
        libs = libs[0]

    return libs




[docs]
def importbypath(path, name=None, overwrite=False):
    """
    Import a module by path.

    Useful for importing multiple versions of the same module for comparison purposes.

    Args:
        path (str/path): the path to load the module from (with or without __init__.py)
        name (str): the name of the loaded module (by default, the file or folder name from the path)
        overwrite (bool): if True, load the module by the original name even if it exists (otherwise, generate a unique name, e.g. module1)

    **Examples**::

        # Load a module that isn't importable otherwise
        mymod = sc.importbypath('my file with spaces.py')

        # Load two versions of the same module
        old = sc.importbypath('/path/to/old/mylib')
        new = sc.importbypath('/path/to/new/mylib')
        assert new.__version__ > old.__version__ # Example version comparison (see also sc.compareverisons())

    See also :func:`sc.importbyname() <importbyname>`.

    | *New in version 3.0.0.*
    | *New in version 3.2.0:* Allow importing two modules that have self imports (e.g. "import mylib" from within mylib)
    | *New in version 3.2.1:* "overwrite" argument
    """
    # Sanitize the path and filename
    default_file='__init__.py'
    filepath = Path(path)
    parts = list(filepath.parts)
    if filepath.is_dir():
        filepath = filepath / default_file # Append default filename
    elif not filepath.is_file():
        errormsg = f'Could not import {filepath}: is not a valid file or folder'
        raise FileNotFoundError(errormsg)

    # Get the original name (what Python would know it as) from either the filename or the folder name
    if parts[-1] == default_file:
        basename = parts[-2] # If __init__.py was included, strip it now
    else:
        basename = parts[-1].removesuffix('.py')
    orig_name = sanitizestr(basename, validvariable=True) # Ensure it's a valid name
    if name is None:
        name = orig_name
        if name in sys.modules and not overwrite: # If a name isn't provided, ensure it doesn't conflict with an existing module name
            name = uniquename(name, sys.modules.keys())
    renamed = name != orig_name # Flag to track if we've renamed the module

    # If the original name is already present, make a copy to restore later
    if orig_name in sys.modules:
        orig_module = sys.modules[orig_name]
        restore = True
    else:
        restore = False

    # Actually import the module -- from https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
    spec = importlib.util.spec_from_file_location(name, filepath)
    module = importlib.util.module_from_spec(spec)

    # We need to call it by both orig_name and name to handle self-referential imports
    sys.modules[name] = module
    if renamed:
        sys.modules[orig_name] = module

    # Now actually "execute" the module, i.e. load it into memory
    spec.loader.exec_module(module)

    # Finally, clean up the original module name
    if restore and not overwrite:
        sys.modules[orig_name] = orig_module
    elif renamed:
        del sys.modules[orig_name]

    return module




##############################################################################
#%% Classes
##############################################################################

__all__ += ['KeyNotFoundError', 'LinkException', 'autolist', 'Link', 'LazyModule', 'tryexcept']



[docs]
class KeyNotFoundError(KeyError):
    """
    A tiny class to fix repr for KeyErrors. KeyError prints the repr of the error
    message, rather than the actual message, so e.g. newline characters print as
    the character rather than the actual newline.

    **Example**::

        raise sc.KeyNotFoundError('The key "foo" is not available, but these are: "bar", "cat"')
    """

    def __str__(self): # pragma: no cover
        return Exception.__str__(self)




[docs]
class LinkException(Exception): # pragma: no cover
    """
    An exception to raise when links are broken, for exclusive use with the Link
    class.
    """
    pass




[docs]
class autolist(list):
    """
    A simple extension to a list that defines add methods to simplify appending
    and extension.

    **Examples**::

        ls = sc.autolist(3) # Quickly convert a scalar to a list

        ls = sc.autolist()
        for i in range(5):
            ls += i # No need for ls += [i]
    """
    def __init__(self, *args):
        arglist = mergelists(*args) # Convert non-iterables to iterables
        list.__init__(self, arglist)
        return


[docs]
    def __add__(self, obj=None):
        """ Allows non-lists to be concatenated """
        obj = tolist(obj)
        new = self.__class__(list.__add__(self, obj)) # Ensure it returns an autolist
        return new


    def __iadd__(self, obj):
        """ Allows += to work correctly -- key feature of autolist """
        obj = tolist(obj)
        self.extend(obj)
        return self

    def __getitem__(self, key):
        try:
            return list.__getitem__(self, key)
        except IndexError: # pragma: no cover
            errormsg = f'list index {key} is out of range for list of length {len(self)}'
            raise IndexError(errormsg) from None # Don't show the traceback




[docs]
class Link:
    """
    A class to differentiate between an object and a link to an object. The idea
    is that this object is parsed differently from other objects -- most notably,
    a recursive method (such as a pickle) would skip over Link objects, and then
    would fix them up after the other objects had been reinstated.

    Version: 2017jan31
    """

    def __init__(self, obj=None):
        self.obj = obj # Store the object -- or rather a reference to it, if it's mutable
        try:    self.uid = obj.uid # If the object has a UID, store it separately
        except: self.uid = None # If not, just use None

    def __repr__(self): # pragma: no cover
        """ Just use default """
        output  = sc.prepr(self)
        return output


[docs]
    def __call__(self, obj=None):
        """ If called with no argument, return the stored object; if called with argument, update object """
        if obj is None:
            if type(self.obj)==LinkException: # If the link is broken, raise it now
                raise self.obj
            return self.obj
        else: # pragma: no cover
            self.__init__(obj)
            return


    def __copy__(self, *args, **kwargs):
        """ Do NOT automatically copy link objects!! """
        return Link(LinkException('Link object copied but not yet repaired'))

    def __deepcopy__(self, *args, **kwargs):
        """ Same as copy """
        return self.__copy__(*args, **kwargs)




[docs]
class LazyModule:
    """
    Create a "lazy" module that is loaded if and only if an attribute is called.

    Typically not for use by the user, but is used by :func:`sc.importbyname() <importbyname>`.

    Args:
        module (str): name of the module to (not) load
        variable (str): variable name to assign the module to
        namespace (dict): the namespace to use (if not supplied, globals())
        overwrite (bool): whether to allow overwriting an existing variable (by default, yes)

    **Example**::

        pd = sc.LazyModule('pandas', 'pd') # pd is a LazyModule, not actually pandas
        df = pd.DataFrame() # Not only does this work, but pd is now actually pandas

    *New in version 2.0.0.*
    """

    def __init__(self, module, variable, namespace=None, overwrite=True):
        self._variable  = variable
        self._module    = module
        self._namespace = namespace
        self._overwrite = overwrite
        return


    def __repr__(self):
        output = f"<sc.LazyModule({self._variable}='{self._module}') at {hex(id(self))}>"
        return output


    def __getattr__(self, attr):
        """ In most cases, when an attribute is retrieved we want to replace this module with the actual one """
        _builtin_keys = ['_variable', '_module', '_namespace', '_overwrite', '_load']
        if attr in _builtin_keys: # pragma: no cover
            obj = object.__getattribute__(self, attr)
        else:
            obj = self._load(attr)
        return obj


    def _load(self, attr=None):
        """ Stop being lazy and load the module """
        var = self._variable
        lib = importlib.import_module(self._module)
        _assign_to_namespace(var, lib, namespace=self._namespace, overwrite=self._overwrite)
        if attr:
            obj = getattr(lib, attr)
        else: # pragma: no cover
            obj = lib
        return obj




[docs]
class tryexcept(cl.suppress):
    """
    Simple class to catch exceptions in a single line

    Effectively an alias to :obj:`contextlib.suppress()`, which itself is a programmatic
    equivalent to using try-except blocks.

    By default, all errors are caught. If ``catch`` is not None, then by default
    raise all other exceptions; if ``die`` is an exception (list of exceptions),
    then by default suppress all other exceptions.

    Due to Python's fundamental architecture, exceptions can only be caught inside
    a with statement, and the with block will exit immediately as soon as the first
    exception is encountered.

    Args:
        message (str): a custom message to print; "<EXCEPTION>" will be replaced in the message with the text of the exception
        die (bool/exception): default behavior of whether to raise caught exceptions; or, exceptions to die on (others to catch)
        catch (exception): one or more exceptions to catch regardless of "die"
        verbose (bool): whether to print caught exceptions (0 = silent, 1 = error type, 2 = full error information)
        history (list/tryexcept): a ``tryexcept`` object, or a list of exceptions, to keep the history (see example below)

    **Examples**::

        # Basic usage
        values = [0,1]
        with sc.tryexcept(): # Equivalent to contextlib.suppress(Exception)
            values[2]

        # Raise only certain errors
        with sc.tryexcept(die=IndexError): # Catch everything except IndexError
            values[2]

        # Catch (do not raise) only certain errors, and print full error information
        with sc.tryexcept(catch=IndexError, verbose=2): # Raise everything except IndexError
            values[2]

        # Storing the history of multiple exceptions
        tryexc = None
        for i in range(5):
            with sc.tryexcept(history=tryexc) as tryexc:
                print(values[i])
        tryexc.traceback()

        # With a custom message
        with sc.tryexcept('Caught exception: <EXCEPTION>'):
            values[2]

    | *New in version 2.1.0.*
    | *New in version 3.0.0:* renamed "print" to "traceback"; added "to_df" and "disp" options
    | *New in version 3.1.0:* renamed "exceptions" to "data"; added "exceptions" property
    | *New in version 3.2.3:* "message" argument
    """
    def __init__(self, message=None, die=None, catch=None, verbose=1, history=None):
        # Check die and message
        if message is not None and not isinstance(message, str):
            if die is not None:
                if isinstance(die, str): # Swap order, e.g. sc.tryexcept(KeyError, 'not a key error')
                    message,die = die,message
                else:
                    errormsg = f'Could not understand types for {message =} and {die =}, are you sure the order is correct?'
                    raise TypeError(errormsg)
            else: # Just treat it as die, e.g. sc.tryexcept(KeyError)
                die = message

        # Handle defaults
        dietypes   = []
        catchtypes = []
        if die is None and catch is None: # Default: do not die
            self.defaultdie = False
        elif die in [True, False, 0, 1]: # It's truthy: use it directly # pragma: no cover
            self.defaultdie = die
        elif die is None and catch is not None: # We're asked to catch some things, so die otherwise
            self.defaultdie = True
            catchtypes = tolist(catch)
        elif die is not None and catch is None: # Vice versa
            self.defaultdie = False
            dietypes = tolist(die)
        else: # pragma: no cover
            errormsg = 'Unexpected input to "die" and "catch": typically only one or the other should be provided'
            raise ValueError(errormsg)

        # Finish initialization
        self.dietypes   = tuple(dietypes)
        self.catchtypes = tuple(catchtypes)
        self.verbose    = int(verbose)
        self.message    = sc.ifelse(message, '')
        self.outputstr  = ''
        self.data = []
        if history is not None:
            if isinstance(history, (list, tuple)): # pragma: no cover
                self.data.extend(list(history))
            elif isinstance(history, tryexcept):
                self.data.extend(history.data)
            else: # pragma: no cover
                errormsg = f'Could not understand supplied history: must be a list or a tryexcept object, not {type(history)}'
                raise TypeError(errormsg)
        return

    def __repr__(self):
        return sc.prepr(self)

    def disp(self):
        return sc.pr(self)

    def __len__(self):
        return len(self.data)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, traceback):
        """ If a context manager returns True from exit, the exception is caught """

        if exc_type is not None:
            self.data.append([exc_type, exc_val, traceback])
            die = (self.defaultdie or issubclass(exc_type, self.dietypes))
            live = issubclass(exc_type, self.catchtypes)

            if self.verbose > 1:
                exceptstr = '\n' + self.traceback(tostring=True) + '\n'
            else:
                exceptstr = f'{exc_type} {exc_val}'

            # Construct exit string
            if self.message:
                placeholder = '<EXCEPTION>'
                self.outputstr = self.message
                if placeholder in self.outputstr:
                    self.outputstr = self.outputstr.replace(placeholder, exceptstr)
            else:
                self.outputstr = exceptstr

            if die and not live:
                return
            else:
                if self.verbose: # Print everything # pragma: no cover
                    print(self.outputstr)
                return True


[docs]
    def traceback(self, which=None, tostring=False):
        """
        Print the exception (usually the last)

        Args:
            which (int/list): which exception(s) to print; if None, print all
            tostring (bool): whether to return as a string (otherwise print)

        *New in version 3.1.0:* optionally print multiple tracebacks
        """
        string = ''
        if self.died:
            if which is None:
                which = np.arange(len(self))
            inds = toarray(which)
            for ind in inds:
                string += sc.ansi.green('—'*60 + '\n' + f'Traceback {ind+1} of {len(self)}:\n')
                string += traceback(*self.data[ind]) + '\n'
        else: # pragma: no cover
            print('No exceptions were encountered; nothing to trace')
        if tostring:
            return string
        else:
            print(string)
            return



[docs]
    def to_df(self):
        """ Convert the exceptions to a dataframe; most useful with multiple exceptions """
        df = sc.dataframe(self.data, columns=['type','value','traceback'])
        self.df = df
        return df


    @property
    def exceptions(self):
        """ Retrieve the last exception, if any """
        return [entry[1] for entry in self.data]

    @property
    def exception(self):
        """ Retrieve the last exception, if any """
        if len(self.data): # Exceptions were encountered
            return self.data[-1][1] # This is just the exception
        else:
            return None # Else, return None

    @property
    def died(self):
        """ Whether or not any exceptions were encountered """
        return len(self) > 0