Source code for elementMass.elements

import re
from decimal import Decimal
from fractions import Fraction
from typing import Dict, List

import pandas as pd

from .oxidation_states import Oxidation_state
from .Periodic_table import get_periodic_table

periodic_table = get_periodic_table()


def _find_elements(compound: str):
    elements = re.findall("([A-Z][^A-Z]*)", str(compound))

    # Raise an error if no elements are found
    if len(elements) < 1:
        raise ValueError(f"'{compound}' does not contain valid elements")

    for element in elements:
        # Raise an error for invalid elements with more than 1 lower case character
        if sum(c.islower() for c in element) > 1:
            raise ValueError(f"'{element}' is not a valid element")
        # Check for non-word characters
        elif len(re.findall(r"\w|\.", element)) != len(element):
            raise ValueError(f"'{element}' contains an invalid character")

    # Raise an error if there are any leftover characters
    length_elements = sum(len(s) for s in elements)
    if len(compound) != length_elements:
        raise ValueError(
            f"There are leftover characters in '{compound}'; elements found: {elements}"
        )

    return elements


def _find_quantity(element: str):
    element_quantity = re.findall(r"(\D+|\d[^A-Z]*)", element)

    if len(element_quantity) < 2:
        element_quantity.append(1)

    return element_quantity


def _decompose(compound: str):
    elements = [_find_quantity(i) for i in _find_elements(compound)]

    elements_pd = pd.DataFrame(elements, columns=["element", "quantity"]).set_index(
        "element"
    )

    return elements_pd.astype(float).squeeze("columns")


[docs] def calculate_weight(compound: str) -> float: """ Get the atomic mass of a compound or element Parameters ---------- compound : str chemical notation of a compound or element Returns ------- float weight in atomic mass units """ elements = _decompose(compound) return (periodic_table[elements.index] * elements).sum()
[docs] def compound_weights(compounds: List[str]) -> pd.Series: """ Get the atomic mass of a compounds or elements Parameters ---------- compound : list of str chemical notation of compounds or elements Returns ------- pandas.Series weights in atomic mass units """ weights = pd.Series(index=compounds, name="weights", dtype="float64") for i in weights.index: weights[i] = calculate_weight(i) return weights
[docs] def cation_numbers(compounds: List[str]) -> pd.Series: """ Get the cation amount in compounds Parameters ---------- compound : list of str chemical notation of an oxide or element Returns ------- pandas.Series number of cations in each compound """ cations = pd.Series(index=compounds, name="cations", dtype=int) for i in cations.index: cations.loc[i] = _decompose(i).iloc[0] if _is_oxide(i) else 1 return cations
[docs] def oxygen_numbers(compounds: List[str]) -> pd.Series: """ Get the oxygen amount in compounds Parameters ---------- compound : list of str chemical notation of compounds or elements Returns ------- pandas.Series number of oxygen in each compound """ oxygen = pd.Series(index=compounds, name="oxygen", dtype=int) for i in oxygen.index: try: oxygen[i] = _decompose(i)["O"] except KeyError: oxygen[i] = 0 return oxygen
def _get_cation_charge(element: str) -> int: if _is_oxide(element): oxide = _decompose(element) return _get_cation_charge_oxide(oxide) return _get_element_charge(element) def _get_cation_charge_oxide(oxide: pd.Series): if len(oxide) > 2: raise ValueError("oxide contains more than 1 cations") n_O = oxide["O"] n_cations = oxide.iloc[0] charge = int(n_O * 2 / n_cations) return charge def _get_element_charge(element: str): try: charge = int(re.sub("\D", "", element)) except ValueError: charge = Oxidation_state[element] if not (-5 <= charge <= 9): raise ValueError(f"Invalid element charge: {charge}") return charge def _is_oxide(compound: str) -> bool: """ returns true if compound contains more than one element and one of them is oxygen. Does not work for 'Os'. """ elements = re.sub(r"[a-z\d]+", "", compound) # strip lowercase letters and numbers return (len(elements) > 1) & ("O" in elements) def _is_element(compound: str) -> bool: """ returns true if compound is a single element """ elements = re.sub(r"[a-z\d]+", "", compound) # strip lowercase letters and numbers return len(elements) == 1
[docs] def cation_names(compounds: List[str]) -> List: """ Get the name of the first cation for each element in ``compounds`` Parameters ---------- compound : list of str chemical notation of compounds or elements Returns ------- list names of cations """ is_oxide = [_is_oxide(c) for c in compounds] # is_element = [_is_element(c) for c in compounds] element_names = [ _decompose(c).index[0] if ox else re.sub(r"\d+", "", c) for c, ox in zip(compounds, is_oxide) ] charges = [_get_cation_charge(c) for c in compounds] names = [ (e if Oxidation_state[e] == charge else f"{e}{int(charge)}") for e, charge in zip(element_names, charges) ] return names
[docs] def get_oxide_name(ion: str) -> str: """ Convert an ion name to oxide. If ion charge is not included in the ion name, ElementMass's default oxidation states are used. Parameters ---------- ion : str ion name, charge can optionally be included in the name e.g. 'Fe3' Returns ------- str name of oxide """ try: charge = float(re.sub(r"\D", "", ion)) except ValueError: charge = Oxidation_state[ion] if charge < 0: return ion ratio = Fraction(Decimal(charge / 2)) n_cation = int(ratio.denominator) if ratio.denominator > 1 else "" n_O = int(ratio.numerator) if ratio.numerator > 1 else "" ion_name = re.sub(r"\d+", "", ion) return f"{ion_name}{n_cation}O{n_O}"
def get_oxide_names(ions: List[str]) -> List: return [get_oxide_name(ion=ion) for ion in ions]