# NIST Public License - 2019
#
# This software was developed by employees of the National Institute of
# Standards and Technology (NIST), an agency of the Federal Government
# and is being made available as a public service. Pursuant to title 17
# United States Code Section 105, works of NIST employees are not subject
# to copyright protection in the United States. This software may be
# subject to foreign copyright. Permission in the United States and in
# foreign countries, to the extent that NIST may hold copyright, to use,
# copy, modify, create derivative works, and distribute this software and
# its documentation without fee is hereby granted on a non-exclusive basis,
# provided that this notice and disclaimer of warranty appears in all copies.
#
# THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND,
# EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED
# TO, ANY WARRANTY THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY
# IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
# AND FREEDOM FROM INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION
# WILL CONFORM TO THE SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE
# ERROR FREE. IN NO EVENT SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING,
# BUT NOT LIMITED TO, DIRECT, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES,
# ARISING OUT OF, RESULTING FROM, OR IN ANY WAY CONNECTED WITH THIS SOFTWARE,
# WHETHER OR NOT BASED UPON WARRANTY, CONTRACT, TORT, OR OTHERWISE, WHETHER
# OR NOT INJURY WAS SUSTAINED BY PERSONS OR PROPERTY OR OTHERWISE, AND
# WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT OF THE RESULTS OF,
# OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
#
"""Parse metadata from FEI tif images (saved by FEI/Thermo Fisher FIBs and SEMs)."""
import configparser
import contextlib
import io
import logging
import re
from decimal import Decimal, InvalidOperation
from math import degrees
from pathlib import Path
from typing import Tuple
from lxml import etree
from nexusLIMS.extractors.utils import _set_instr_name_and_time
from nexusLIMS.utils import set_nested_dict_value, sort_dict, try_getting_dict_value
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
def _detect_and_process_xml_metadata(
metadata_str: str,
) -> Tuple[str, dict]:
"""
Find and (if necessary) parse XML metadata in a Thermo Fisher FIB/SEM TIF file.
Some Thermo Fisher FIB/SEM files have additional metadata embedded as XML at the end
of the TIF file, which cannot be handled by the ``ConfigParser`` implementation of
:py:meth:`get_quanta_metadata`. This method will detect, parse, and remove the XML
from the metadata if present.
Parameters
----------
metadata_str
The metadata at the end of the TIF file as a string. May or may not include
an XML section (this depends on the version of the Thermo software that saved
the image).
Returns
-------
metadata_str
The originally provided metadata as a string, but with the XML portion removed
if it was present
xml_metadata
A dictionary containing the metadata that was present in the XML portion. Will
be an empty dictionary if there was no XML.
"""
xml_regex = re.compile(r'<\?xml version=".+"\?>')
regex_match = xml_regex.search(metadata_str)
if regex_match:
# there is an xml declaration in the metadata of this file, so parse it:
xml_str = metadata_str[regex_match.span()[0] :]
metadata_str = metadata_str[: regex_match.span()[0]]
root = etree.fromstring(xml_str) # noqa: S320
return metadata_str, _xml_el_to_dict(root)
return metadata_str, {}
def _xml_el_to_dict(node: etree.ElementBase):
"""
Convert an lxml.etree node tree into a dict.
This is used to transform the XML metadata section into a dictionary representation
so it can be stored alongside the other metadata.
Taken from https://stackoverflow.com/a/66103841/1435788
"""
result = {}
for element in node.iterchildren():
# Remove namespace prefix
key = element.tag.split("}")[1] if "}" in element.tag else element.tag
# Process element as tree element if the inner XML contains
# non-whitespace content
if element.text and element.text.strip():
value = element.text
else:
value = _xml_el_to_dict(element)
if key in result:
if isinstance(result[key], list):
result[key].append(value) # pragma: no cover
else:
tempvalue = result[key].copy()
result[key] = [tempvalue, value]
else:
result[key] = value
return result
def _fix_duplicate_multigis_metadata_tags(metadata_str: str) -> str:
"""
Rename the metadata section headers to allow parsing by ``ConfigParser``.
Some instruments have metadata section titles like so:
[MultiGIS]
[MultiGISUnit1]
[MultiGISGas1]
[MultiGISGas2]
[MultiGISGas3]
[MultiGISUnit2]
[MultiGISGas1]
[MultiGISGas2]
[MultiGISGas3]
[MultiGISUnit3]
[MultiGISGas1]
[MultiGISGas2]
[MultiGISGas3]
Which causes errors because ``ConfigParser`` raises a ``DuplicateSectionError`` on
the first time it sees a duplicate of ``MultiGISGas1``. As a workaround, this method
will modify an entire string of "Quanta" metadata so that it instead reads like so:
[MultiGIS]
[MultiGISUnit1]
[MultiGISUnit1.MultiGISGas1]
[MultiGISUnit1.MultiGISGas2]
[MultiGISUnit1.MultiGISGas3]
[MultiGISUnit2]
[MultiGISUnit2.MultiGISGas1]
[MultiGISUnit2.MultiGISGas2]
[MultiGISUnit2.MultiGISGas3]
[MultiGISUnit3]
[MultiGISUnit3.MultiGISGas1]
[MultiGISUnit3.MultiGISGas2]
[MultiGISUnit3.MultiGISGas3]
"""
metadata_to_return = ""
multi_gis_section_numbers = re.findall(r"\[MultiGISUnit(\d+)\]", metadata_str)
if multi_gis_section_numbers:
multi_gis_unit_indices = [
metadata_str.index(f"[MultiGISUnit{num}]")
for num in multi_gis_section_numbers
]
metadata_to_return += metadata_str[: multi_gis_unit_indices[0]]
for i, num in enumerate(multi_gis_section_numbers):
if i < len(multi_gis_unit_indices) - 1:
to_process = metadata_str[
multi_gis_unit_indices[i] : multi_gis_unit_indices[i + 1]
]
else:
to_process = metadata_str[multi_gis_unit_indices[i] :]
multi_gis_gas_tags = re.findall(r"\[(MultiGISGas\d+)\]", to_process)
for tag in multi_gis_gas_tags:
to_process = to_process.replace(tag, f"MultiGISUnit{num}.{tag}")
metadata_to_return += to_process
else:
metadata_to_return = metadata_str
return metadata_to_return
[docs]def parse_beam_info(mdict, beam_name):
"""
Parse the "Beam info" section of the metadata.
Parameters
----------
mdict : dict
A metadata dictionary as returned by :py:meth:`get_quanta_metadata`
beam_name : str
The "beam name" read from the root-level ``Beam`` node of the
metadata dictionary
Returns
-------
mdict : dict
The same metadata dictionary with some values added under the
root-level ``nx_meta`` key
"""
if beam_name == "not found":
return mdict
# Values are in SI units, but we want easy to display, so include the
# exponential factor that will get us from input unit (such as seconds)
# to output unit (such as μs -- meaning factor = 6)
to_parse = [
([beam_name, "EmissionCurrent"], ["Emission Current (μA)"], 6),
([beam_name, "HFW"], ["Horizontal Field Width (μm)"], 6),
([beam_name, "HV"], ["Voltage (kV)"], -3),
([beam_name, "SourceTiltX"], ["Beam Tilt X"], 0),
([beam_name, "SourceTiltY"], ["Beam Tilt Y"], 0),
([beam_name, "StageR"], ["Stage Position", "R"], 0),
([beam_name, "StageTa"], ["Stage Position", "α"], 0), # noqa: RUF001
# all existing quanta images have a value of zero for beta
# ([beam_name, 'StageTb'], ['Stage Position', 'β'], 0), # noqa: ERA001
([beam_name, "StageX"], ["Stage Position", "X"], 0),
([beam_name, "StageY"], ["Stage Position", "Y"], 0),
([beam_name, "StageZ"], ["Stage Position", "Z"], 0),
([beam_name, "StigmatorX"], ["Stigmator X Value"], 0),
([beam_name, "StigmatorY"], ["Stigmator Y Value"], 0),
([beam_name, "VFW"], ["Vertical Field Width (μm)"], 6),
([beam_name, "WD"], ["Working Distance (mm)"], 3),
]
for m_in, m_out, factor in to_parse:
val = try_getting_dict_value(mdict, m_in)
if val not in ("not found", ""):
val = Decimal(val) * Decimal(str(10**factor))
set_nested_dict_value(
mdict,
["nx_meta", *m_out],
float(val) if isinstance(val, Decimal) else val,
)
# Add beam name to metadata:
set_nested_dict_value(mdict, ["nx_meta"] + ["Beam Name"], beam_name)
# BeamShiftX and BeamShiftY require an additional test:
bs_x_val = try_getting_dict_value(mdict, [beam_name, "BeamShiftX"])
bs_y_val = try_getting_dict_value(mdict, [beam_name, "BeamShiftY"])
if bs_x_val != "not found" and Decimal(bs_x_val) != 0:
set_nested_dict_value(
mdict,
["nx_meta"] + ["Beam Shift X"],
float(Decimal(bs_x_val)),
)
if bs_y_val != "not found" and Decimal(bs_y_val) != 0:
set_nested_dict_value(
mdict,
["nx_meta"] + ["Beam Shift Y"],
float(Decimal(bs_y_val)),
)
# only parse scan rotation if value is not zero:
# Not sure what the units of this value are... looks like radians because
# unique values range from 0 to 6.24811 - convert to degrees for display
scan_rot_val = try_getting_dict_value(mdict, [beam_name, "ScanRotation"])
if scan_rot_val != "not found" and Decimal(scan_rot_val) != 0:
scan_rot_dec = Decimal(scan_rot_val) # make scan_rot a Decimal
# get number of digits in Decimal value (so we don't artificially
# introduce extra precision)
digits = abs(scan_rot_dec.as_tuple().exponent)
# round the final float value to that number of digits
scan_rot_val = round(degrees(scan_rot_dec), digits)
set_nested_dict_value(mdict, ["nx_meta", "Scan Rotation (°)"], scan_rot_val)
# TiltCorrectionAngle only if TiltCorrectionIsOn == 'yes'
tilt_corr_on = try_getting_dict_value(mdict, [beam_name, "TiltCorrectionIsOn"])
if tilt_corr_on == "yes":
tilt_corr_val = try_getting_dict_value(
mdict,
[beam_name, "TiltCorrectionAngle"],
)
if tilt_corr_val != "not found":
tilt_corr_val = float(Decimal(tilt_corr_val))
set_nested_dict_value(
mdict,
["nx_meta"] + ["Tilt Correction Angle"],
tilt_corr_val,
)
return mdict
[docs]def parse_scan_info(mdict, scan_name):
"""
Parse the "Scan info" section of the metadata.
Parses the `Scan` portion of the metadata dictionary (on a Quanta this is
always `"EScan"`) to get values such as dwell time, field width, and pixel
size.
Parameters
----------
mdict : dict
A metadata dictionary as returned by :py:meth:`get_quanta_metadata`
scan_name : str
The "scan name" read from the root-level ``Beam`` node of the
metadata dictionary
Returns
-------
mdict : dict
The same metadata dictionary with some values added under the
root-level ``nx_meta`` key
"""
if scan_name == "not found":
return mdict
# Values are in SI units, but we want easy to display, so include the
# exponential factor that will get us from input unit (such as seconds)
# to output unit (such as μs -- meaning factor = 6)
to_parse = [
([scan_name, "Dwell"], ["Pixel Dwell Time (μs)"], 6),
([scan_name, "FrameTime"], ["Total Frame Time (s)"], 0),
([scan_name, "HorFieldsize"], ["Horizontal Field Width (μm)"], 6),
([scan_name, "VerFieldsize"], ["Vertical Field Width (μm)"], 6),
([scan_name, "PixelHeight"], ["Pixel Width (nm)"], 9),
([scan_name, "PixelWidth"], ["Pixel Height (nm)"], 9),
]
for m_in, m_out, factor in to_parse:
val = try_getting_dict_value(mdict, m_in)
if val not in ("not found", ""):
val = Decimal(val) * Decimal(str(10**factor))
set_nested_dict_value(
mdict,
["nx_meta", *m_out],
float(val) if isinstance(val, Decimal) else val,
)
return mdict
[docs]def parse_det_info(mdict, det_name):
"""
Parse the "Detector info" section of the metadata.
Parses the `Detector` portion of the metadata dictionary from the Quanta to
get values such as brightness, contrast, signal, etc.
Parameters
----------
mdict : dict
A metadata dictionary as returned by :py:meth:`get_quanta_metadata`
det_name : str
The "detector name" read from the root-level ``Beam`` node of the
metadata dictionary
Returns
-------
mdict : dict
The same metadata dictionary with some values added under the
root-level ``nx_meta`` key
"""
if det_name == "not found":
return mdict
to_parse = [
([det_name, "Brightness"], ["Detector Brightness Setting"]),
([det_name, "Contrast"], ["Detector Contrast Setting"]),
([det_name, "EnhancedContrast"], ["Detector Enhanced Contrast Setting"]),
([det_name, "Signal"], ["Detector Signal"]),
([det_name, "Grid"], ["Detector Grid Voltage (V)"]),
([det_name, "Setting"], ["Detector Setting"]),
]
for m_in, m_out in to_parse:
val = try_getting_dict_value(mdict, m_in)
if val != "not found":
try:
val = Decimal(val)
if m_in == [det_name, "Setting"]:
# if "Setting" value is numeric, it's just the Grid
# voltage so skip it
continue
except (ValueError, InvalidOperation):
pass
set_nested_dict_value(
mdict,
["nx_meta", *m_out],
float(val) if isinstance(val, Decimal) else val,
)
set_nested_dict_value(mdict, ["nx_meta"] + ["Detector Name"], det_name)
return mdict
[docs]def parse_system_info(mdict):
"""
Parse the "System info" section of the metadata.
Parses the `System` portion of the metadata dictionary from the Quanta to
get values such as software version, chamber config, etc.
Parameters
----------
mdict : dict
A metadata dictionary as returned by :py:meth:`get_quanta_metadata`
Returns
-------
mdict : dict
The same metadata dictionary with some values added under the
root-level ``nx_meta`` key
"""
if try_getting_dict_value(mdict, ["System"]) == "not found":
return mdict
to_parse = [
(["System", "Chamber"], ["Chamber ID"]),
(["System", "Pump"], ["Vacuum Pump"]),
(["System", "SystemType"], ["System Type"]),
(["System", "Stage"], ["Stage Description"]),
]
for m_in, m_out in to_parse:
val = try_getting_dict_value(mdict, m_in)
if val != "not found":
set_nested_dict_value(mdict, ["nx_meta", *m_out], val)
# Parse software info into one output tag:
output_vals = []
val = try_getting_dict_value(mdict, ["System", "Software"])
if val != "not found":
output_vals.append(val)
val = try_getting_dict_value(mdict, ["System", "BuildNr"])
if val != "not found":
output_vals.append(f"(build {val})")
if len(output_vals) > 0:
set_nested_dict_value(
mdict,
["nx_meta"] + ["Software Version"],
" ".join(output_vals),
)
# parse column and type into one output tag:
output_vals = []
val = try_getting_dict_value(mdict, ["System", "Column"])
if val != "not found":
output_vals.append(val)
val = try_getting_dict_value(mdict, ["System", "Type"])
if val != "not found":
output_vals.append(val)
if len(output_vals) > 0:
set_nested_dict_value(
mdict,
["nx_meta"] + ["Column Type"],
" ".join(output_vals),
)
return mdict
[docs]def parse_image_info(mdict):
"""
Parse the "Image info" section of the metadata.
Parses the `Image` portion of the metadata dictionary from the Quanta to
get values such as drift correction, image integration settings, etc.
Parameters
----------
mdict : dict
A metadata dictionary as returned by :py:meth:`get_quanta_metadata`
Returns
-------
mdict : dict
The same metadata dictionary with some values added under the
root-level ``nx_meta`` key
"""
# process drift correction
val = try_getting_dict_value(mdict, ["Image", "DriftCorrected"])
if val != "not found":
# set to true if the value is 'On'
val = val == "On"
set_nested_dict_value(mdict, ["nx_meta"] + ["Drift Correction Applied"], val)
# process frame integration
val = try_getting_dict_value(mdict, ["Image", "Integrate"])
if val != "not found":
try:
val = int(val)
if val > 1:
set_nested_dict_value(mdict, ["nx_meta"] + ["Frames Integrated"], val)
except ValueError:
pass
# process mag mode
val = try_getting_dict_value(mdict, ["Image", "MagnificationMode"])
if val != "not found":
with contextlib.suppress(ValueError):
val = int(val)
set_nested_dict_value(mdict, ["nx_meta"] + ["Magnification Mode"], val)
# Process "ResolutionX/Y" (data size)
x_val = try_getting_dict_value(mdict, ["Image", "ResolutionX"])
y_val = try_getting_dict_value(mdict, ["Image", "ResolutionY"])
try:
x_val = int(x_val)
y_val = int(y_val)
except ValueError:
pass
if x_val != "not found" and y_val != "not found":
set_nested_dict_value(
mdict,
["nx_meta", "Data Dimensions"],
str((x_val, y_val)),
)
return mdict