Source code for exasol.bucketfs._convert

from __future__ import annotations

import hashlib
from pathlib import Path
from typing import (
    ByteString,
    Iterable,
)


def _chunk_as_bytes(chunk: int | ByteString) -> ByteString:
    """
    In some scenarios python converts single bytes to integers:
    >>> chunks = [type(chunk) for chunk in b"abc"]
    >>> chunks
    ... [<class 'int'>, <class 'int'>, <class 'int'>]
    in order to cope with this transparently this wrapper can be used.
    """
    if not isinstance(chunk, Iterable):
        chunk = bytes([chunk])
    return chunk


def _bytes(chunks: Iterable[ByteString]) -> ByteString:
    chunks = (_chunk_as_bytes(c) for c in chunks)
    data = bytearray()
    for chunk in chunks:
        data.extend(chunk)
    return data


[docs]def as_bytes(chunks: Iterable[ByteString]) -> ByteString: """ Transforms a set of byte chunks into a bytes like object. Args: chunks: which shall be concatenated. Return: A single continues byte like object. """ return _bytes(chunks)
[docs]def as_string(chunks: Iterable[ByteString], encoding: str = "utf-8") -> str: """ Transforms a set of byte chunks into a string. Args: chunks: which shall be converted into a single string. encoding: which shall be used to convert the bytes to a string. Return: A string representation of the converted bytes. """ return _bytes(chunks).decode(encoding)
[docs]def as_file(chunks: Iterable[ByteString], filename: str | Path) -> Path: """ Transforms a set of byte chunks into a string. Args: chunks: which shall be written to file. filename: for the file which is to be created. Return: A path to the created file. """ chunks = (_chunk_as_bytes(c) for c in chunks) filename = Path(filename) with open(filename, "wb") as f: for chunk in chunks: f.write(chunk) return filename
[docs]def as_hash(chunks: Iterable[ByteString], algorithm: str = "sha1") -> ByteString: """ Calculate the hash for a set of byte chunks. Args: chunks: which shall be used as input for the checksum. algorithm: which shall be used for calculating the checksum. Return: A string representing the hex digest. """ try: hasher = hashlib.new(algorithm) except ValueError as ex: raise BucketFsError( "Algorithm ({algorithm}) is not available, please use [{algorithms}]".format( algorithm=algorithm, algorithms=",".join(hashlib.algorithms_available) ) ) from ex chunks = (_chunk_as_bytes(c) for c in chunks) for chunk in chunks: hasher.update(chunk) return hasher.digest()