Skip to content

NIST parser

Module used to download and present attenuation and composition data from the NIST website.

HTMLTableParser

Bases: HTMLParser

This class serves as a html table parser. It is able to parse multiple tables which you feed in. You can access the result per .tables field.

Source code in pygrpm/material/nistparser/nistparser.py
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
class HTMLTableParser(HTMLParser):
    """This class serves as a html table parser. It is able to parse multiple
    tables which you feed in. You can access the result per .tables field.
    """

    # pylint: disable=abstract-method
    # pylint has issues with the ParserBase.error() method,
    # which is not relevant here

    # pylint: disable=too-many-instance-attributes
    # The 8 instances are assumed to be meaningful here
    def __init__(self, decode_html_entities=False, data_separator=" "):
        HTMLParser.__init__(self)

        self._parse_html_entities = decode_html_entities
        self._data_separator = data_separator

        self._in_td = False
        self._in_th = False
        self._current_table = []
        self._current_row = []
        self._current_cell = []
        self.tables = []

    def handle_starttag(self, tag, attrs):
        """We need to remember the opening point for the content of interest.
        The other tags (<table>, <tr>) are only handled at the closing point.
        """
        if tag == "td":
            self._in_td = True
        if tag == "th":
            self._in_th = True

    def handle_data(self, data):
        """This is where we save content to a cell"""
        if self._in_td or self._in_th:
            self._current_cell.append(data.strip())

    def handle_charref(self, name):
        """Handle HTML encoded characters"""

        if self._parse_html_entities:
            self.handle_data(unescape(f"&#{name};"))

    def handle_endtag(self, tag):
        """Here we exit the tags. If the closing tag is </tr>, we know that we
        can save our currently parsed cells to the current table as a row and
        prepare for a new row. If the closing tag is </table>, we save the
        current table and prepare for a new one.
        """
        if tag == "td":
            self._in_td = False
        elif tag == "th":
            self._in_th = False

        if tag in ["td", "th"]:
            final_cell = self._data_separator.join(self._current_cell).strip()
            self._current_row.append(final_cell)
            self._current_cell = []
        elif tag == "tr":
            self._current_table.append(self._current_row)
            self._current_row = []
        elif tag == "table":
            self.tables.append(self._current_table)
            self._current_table = []

handle_charref(name)

Handle HTML encoded characters

Source code in pygrpm/material/nistparser/nistparser.py
470
471
472
473
474
def handle_charref(self, name):
    """Handle HTML encoded characters"""

    if self._parse_html_entities:
        self.handle_data(unescape(f"&#{name};"))

handle_data(data)

This is where we save content to a cell

Source code in pygrpm/material/nistparser/nistparser.py
465
466
467
468
def handle_data(self, data):
    """This is where we save content to a cell"""
    if self._in_td or self._in_th:
        self._current_cell.append(data.strip())

handle_endtag(tag)

Here we exit the tags. If the closing tag is , we know that we can save our currently parsed cells to the current table as a row and prepare for a new row. If the closing tag is , we save the current table and prepare for a new one.

Source code in pygrpm/material/nistparser/nistparser.py
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
def handle_endtag(self, tag):
    """Here we exit the tags. If the closing tag is </tr>, we know that we
    can save our currently parsed cells to the current table as a row and
    prepare for a new row. If the closing tag is </table>, we save the
    current table and prepare for a new one.
    """
    if tag == "td":
        self._in_td = False
    elif tag == "th":
        self._in_th = False

    if tag in ["td", "th"]:
        final_cell = self._data_separator.join(self._current_cell).strip()
        self._current_row.append(final_cell)
        self._current_cell = []
    elif tag == "tr":
        self._current_table.append(self._current_row)
        self._current_row = []
    elif tag == "table":
        self.tables.append(self._current_table)
        self._current_table = []

handle_starttag(tag, attrs)

We need to remember the opening point for the content of interest. The other tags (

, ) are only handled at the closing point.

Source code in pygrpm/material/nistparser/nistparser.py
456
457
458
459
460
461
462
463
def handle_starttag(self, tag, attrs):
    """We need to remember the opening point for the content of interest.
    The other tags (<table>, <tr>) are only handled at the closing point.
    """
    if tag == "td":
        self._in_td = True
    if tag == "th":
        self._in_th = True

get_atomic_number(composition)

Returns the atomic number equivalent to the first element match in the provided string

Parameters:

Name Type Description Default
composition string

The key representing the element, e.g: "H"

required

Returns:

Type Description
int

The atomic number

Source code in pygrpm/material/nistparser/nistparser.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def get_atomic_number(composition: str) -> int:
    """
    Returns the atomic number equivalent to the first element
    match in the provided string
    Parameters
    ----------
    composition : string
        The key representing the element, e.g: "H"

    Returns
    -------
    int
        The atomic number
    """
    element = formula(composition)
    element_list = [elem.number for elem, numFrac in element.atoms.items()]

    if len(element_list) > 1:
        print(
            f"Warning, submitted more than one element, only returning {list(element.atoms)[0]}"
        )
    return [elem.number for elem, numFrac in element.atoms.items()][0]

get_attenuations(composition, energies, option='total', verbose=0, outopt='PIC')

Retrieves the attenuation of a material in cm^2/g at given energies on the NIST website.

Parameters:

Name Type Description Default
composition dict or str or Formula

The composition of the material. If it is a dict, it must be of the form {"formula": Relative weight}. Ex : {"Cu":0.5, "H2O":0.3, "C6H12O6":0.2} if it is a string, it must be a formula. Ex: "C6H12O6" if it is a periodictable.formulas.Formula, the mass fraction of each composing elements is extracted.

required
energies sequence of float

The energies at which the attenuations are computed, given in keV

required
option string

One of: "total", "ph", "comp", or "ray" indicating attenuation type

'total'
verbose int

The verbose level of the function. If it is greater than 0, prints at each step.

0
outopt str

If atomic number is chosen then Mass Attenuation Coefficient is returned by default (PIC), if "CSBA", then atomic cross section is returned instead.

'PIC'

Raises:

Type Description
URLError

If the data request failed more than NISTparser.MAX_NETWORK_TRIES times. At each fail, it waits NISTparser.WAIT_TIME seconds before trying again.

ValueError

If composition is not of a known type (dict, str or periodictable.formulas.Formula)

Returns:

Type Description
list of float

The total mass-attenuation with coherent scattering values for the material at each energies, in cm^2/g

Source code in pygrpm/material/nistparser/nistparser.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def get_attenuations(
    composition: Union[str, Dict, int, Formula],
    energies: Sequence[float],
    option: str = "total",
    verbose: int = 0,
    outopt: str = "PIC",
) -> List:
    """Retrieves the attenuation of a material in cm^2/g
    at given energies on the NIST website.

    Parameters
    ----------
    composition : dict or str or periodictable.formulas.Formula
        The composition of the material. If it is a dict,
        it must be of the form {"formula": Relative weight}.
            Ex : `{"Cu":0.5, "H2O":0.3, "C6H12O6":0.2}`
        if it is a string, it must be a formula. Ex: `"C6H12O6"`
        if it is a `periodictable.formulas.Formula`, the mass fraction of each
        composing elements is extracted.
    energies : sequence of float
        The energies at which the attenuations are computed, given in keV
    option : string
        One of: "total", "ph", "comp", or "ray" indicating attenuation type
    verbose : int, optional
        The verbose level  of the  function.
        If it is greater than 0, prints at each step.
    outopt: str, optional
        If atomic number is chosen then Mass Attenuation Coefficient
            is returned by default (PIC),
        if "CSBA", then atomic cross section is returned instead.

    Raises
    ------
    URLError
        If the data request failed more than `NISTparser.MAX_NETWORK_TRIES`
        times. At each fail, it waits `NISTparser.WAIT_TIME` seconds
        before trying again.
    ValueError
        If `composition` is not of a known type
        (dict, str or periodictable.formulas.Formula)

    Returns
    -------
    list of float
        The total mass-attenuation with coherent scattering values
        for the material at each energies, in cm^2/g
    """
    # Selects attenuation component:
    # ph: photoelectric; comp: compton; total: total
    options_dict = {
        "ph": {
            "value": 4,
            "type": "Photoelectric",
        },
        "comp": {
            "value": 3,
            "type": "Compton (incoherent)",
        },
        "ray": {"value": 2, "type": "Rayleigh (coherent)"},
        "total": {
            "value": -2,
            "type": "Total",
        },
    }

    # Select our index for NIST query and print details if required
    component_index = options_dict[option]["value"]
    if verbose > 0:
        print(f"{options_dict[option]['type']} component selected")

    # NIST has a limit at approx 112 energies
    if len(energies) > MAX_ENERGY_LENGTH:
        if verbose > 0:
            print(
                f"NIST Parser: Too many energies ({len(energies)} >"
                f"{MAX_ENERGY_LENGTH}), dividing."
            )
        all_mus = []
        for index in range(ceil(len(energies) / MAX_ENERGY_LENGTH)):
            all_mus += get_attenuations(
                composition,
                energies[index * MAX_ENERGY_LENGTH : (index + 1) * MAX_ENERGY_LENGTH],
                option=option,
                verbose=verbose,
                outopt=outopt,
            )
        return all_mus

    # Mixture dict (Formula : Relative weight)
    if isinstance(composition, dict):
        url = "https://physics.nist.gov/cgi-bin/Xcom/xcom3_3"
        post_data = dict(
            POST_DATA,
            Formulae="\n".join([f"{k} {v:.6f}" for k, v in composition.items()]),
            Energies="\n".join([f"{e / 1000:0.4f}" for e in energies]),
        )
    elif isinstance(composition, str):  # Formula string
        url = "https://physics.nist.gov/cgi-bin/Xcom/xcom3_2"
        post_data = dict(
            POST_DATA,
            Formula=composition,
            Energies="\n".join([f"{e / 1000:0.4f}" for e in energies]),
        )

    # Periodicttable.formulas.Formula object
    elif isinstance(composition, Formula):
        return get_attenuations(
            {
                elem.symbol: massFrac
                for elem, massFrac in composition.mass_fraction.items()
            },
            energies,
            option=option,
            verbose=verbose,
        )

    elif isinstance(composition, int):  # Atomic number of element
        url = "https://physics.nist.gov/cgi-bin/Xcom/xcom3_1"
        post_data = dict(
            POST_DATA,
            ZNum=composition,
            Energies="\n".join([f"{e / 1000:0.4f}" for e in energies]),
            OutOpt=outopt,
        )
    else:
        raise ValueError(f"Composition value of type {type(composition)} invalid.")

    html = _perform_request(url, post_data, verbose)

    nist_parser = HTMLTableParser()
    nist_parser.feed(html)
    try:
        return [
            float(muOverRho[component_index]) for muOverRho in nist_parser.tables[0][3:]
        ]
    except IndexError:
        raise (
            URLError(f"There was a problem getting the data. We received:\n{html}")
        ) from IndexError

get_composition(material, verbose=0)

Method used to obtain the material composition listed at https://physics.nist.gov/cgi-bin/Star/compos.pl?refer=ap This method expects an enum object from the NISTMaterial class in ./materials_enum.py

Parameters:

Name Type Description Default
material enum

An enum option from the NISTMaterials class of the material whose composition and details should be looked up

required
verbose int

The verbose level of the function. If it is greater than 0, prints at each step.

0

Raises:

Type Description
URLError

If the data request failed more than NISTparser.MAX_NETWORK_TRIES times. At each fail, it waits NISTparser.WAIT_TIME seconds before trying again.

ValueError

If material is not of type NISTMaterials

Returns:

Type Description
dictionary

Itemized relative mass, mass density, mean excitation energy, and reference url

Source code in pygrpm/material/nistparser/nistparser.py
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
def get_composition(material: NISTMaterials, verbose: int = 0) -> Dict:
    """
    Method used to obtain the material composition listed at
    https://physics.nist.gov/cgi-bin/Star/compos.pl?refer=ap
    This method expects an enum object from the NISTMaterial class
    in ./materials_enum.py
    Parameters
    ----------
    material : enum
        An enum option from the NISTMaterials class of the material
        whose composition and details should be looked up
    verbose : int, optional
        The verbose level  of the  function.
        If it is greater than 0, prints at each step.

    Raises
    ------
    URLError
        If the data request failed more than `NISTparser.MAX_NETWORK_TRIES`
        times. At each fail, it waits `NISTparser.WAIT_TIME` seconds
        before trying again.
    ValueError
        If `material` is not of type NISTMaterials

    Returns
    -------
    dictionary
        Itemized relative mass, mass density,
        mean excitation energy, and reference url
    """

    if isinstance(material, NISTMaterials) is False:
        raise ValueError("Material parameter is not an instance of NISTMaterial")

    url = (
        f"https://physics.nist.gov/cgi-bin/Star/"
        f"compos.pl?refer=ap&matno={material.value}"
    )
    html = _perform_request(url, None, verbose)

    nist_parser = HTMLTableParser()
    nist_parser.feed(html)

    # in g/cm^3 & eV respectively
    density, mean_excitation_energy = [val[1] for val in nist_parser.tables[0]]

    comp_table = nist_parser.tables[1][2:]
    # The first entry contains both the header and entries, so purge that
    comp_table[0] = comp_table[0][2:]

    # Nasty way of getting number to symbol
    symbol_conversion = {el.number: el.symbol for el in elements}

    # Replace atomic number with text representation
    comp_table = [
        (symbol_conversion[int(key)], float(value)) for key, value in comp_table
    ]

    composition = dict(comp_table)

    return {
        "relative mass": composition,
        "mass density": float(density),
        "mean excitation energy": float(mean_excitation_energy),
        "reference url": url,
    }

get_cross_sections(composition, energies, option='total', verbose=0)

Retrieves the desired cross sections of an element at given energies on the NIST website in (barns/electron), barn=10^-24cm^2.

Parameters:

Name Type Description Default
composition str or int (atomic number) representing the desired element
required
energies sequence of float

The energies at which the attenuations are computed, given in keV

required
option string

One of: "total", "ph", "comp", or "ray" indicating attenuation type

'total'
verbose int

The verbose level of the function. If it is greater than 0, prints at each step.

0

Raises:

Type Description
URLError

If the data request failed more than NISTparser.MAX_NETWORK_TRIES times. At each fail, it waits NISTparser.WAIT_TIME seconds before trying again.

ValueError

If composition is not of a known type (dict, str or periodictable.formulas.Formula)

Returns:

Type Description
list of float

The desired type of cross section values for the material at each energies, (barns/electron), barn=10^-24cm^2

Source code in pygrpm/material/nistparser/nistparser.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def get_cross_sections(
    composition: Union[int, str],
    energies: Sequence[float],
    option: str = "total",
    verbose: int = 0,
) -> List:
    """Retrieves the desired cross sections of an element at given energies
     on the NIST website in (barns/electron), barn=10^-24cm^2.

    Parameters
    ----------
    composition : str or int (atomic number) representing the desired element
    energies : sequence of float
        The energies at which the attenuations are computed, given in keV
    option : string
        One of: "total", "ph", "comp", or "ray" indicating attenuation type
    verbose : int, optional
        The verbose level  of the  function.
        If it is greater than 0, prints at each step.

    Raises
    ------
    URLError
        If the data request failed more than `NISTparser.MAX_NETWORK_TRIES`
        times. At each fail, it waits `NISTparser.WAIT_TIME` seconds
        before trying again.
    ValueError
        If `composition` is not of a known type
        (dict, str or periodictable.formulas.Formula)

    Returns
    -------
    list of float
        The desired type of cross section values for the material
         at each energies, (barns/electron), barn=10^-24cm^2

    """
    atomic_number = (
        composition if isinstance(composition, int) else get_atomic_number(composition)
    )
    return get_attenuations(
        atomic_number, energies, option=option, verbose=verbose, outopt="CSBA"
    )

get_electronic_cross_sections(composition, energies, option='total', verbose=0)

Retrieves the electronic cross sections of an element at given energies on the NIST website in (barns/electron), barn=10^-24cm^2.

Parameters:

Name Type Description Default
composition str or int (atomic number) representing the desired element
required
energies sequence of float

The energies at which the attenuations are computed, given in keV

required
option string

One of: "total", "ph", "comp", or "ray" indicating attenuation type

'total'
verbose int

The verbose level of the function. If it is greater than 0, prints at each step.

0

Raises:

Type Description
URLError

If the data request failed more than NISTparser.MAX_NETWORK_TRIES times. At each fail, it waits NISTparser.WAIT_TIME seconds before trying again.

ValueError

If composition is not of a known type (dict, str or periodictable.formulas.Formula)

Returns:

Type Description
list of float

The total electronic cross section values for the material at each energies, (barns/electron), barn=10^-24cm^2

Source code in pygrpm/material/nistparser/nistparser.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def get_electronic_cross_sections(
    composition: Union[int, str],
    energies: Sequence[float],
    option: str = "total",
    verbose: int = 0,
) -> List:
    """Retrieves the electronic cross sections of an element
    at given energies on the NIST website
    in (barns/electron), barn=10^-24cm^2.

    Parameters
    ----------
    composition : str or int (atomic number) representing the desired element
    energies : sequence of float
        The energies at which the attenuations are computed, given in keV
    option : string
        One of: "total", "ph", "comp", or "ray" indicating attenuation type
    verbose : int, optional
        The verbose level  of the  function.
        If it is greater than 0, prints at each step.

    Raises
    ------
    URLError
        If the data request failed more than `NISTparser.MAX_NETWORK_TRIES`
        times. At each fail, it waits `NISTparser.WAIT_TIME` seconds
        before trying again.
    ValueError
        If `composition` is not of a known type
        (dict, str or periodictable.formulas.Formula)

    Returns
    -------
    list of float
        The total electronic cross section values for the material
        at each energies, (barns/electron), barn=10^-24cm^2
    """
    atomic_number = (
        composition if isinstance(composition, int) else get_atomic_number(composition)
    )
    cross_section = get_cross_sections(
        composition, energies, option=option, verbose=verbose
    )
    return [cross / atomic_number for cross in cross_section]