Source code for pwkit.io

# -*- mode: python; coding: utf-8 -*-
# Copyright 2012-2016 Peter Williams <peter@newton.cx> and collaborators.
# Licensed under the MIT License.

"""The ``pwkit`` package provides many tools to ease reading and writing data
files. The most generic such tools are located in this module. The most
important tool is the :class:`Path` class for object-oriented navigation of
the filesystem.

"""
from __future__ import absolute_import, division, print_function, unicode_literals

__all__ = str ('''djoin ensure_dir ensure_symlink make_path_func rellink
                  pathlines pathwords try_open words Path''').split ()

import io, os, pathlib
import six

from . import PKError, text_type

if six.PY2:
    path_type = six.binary_type
else:
    path_type = six.text_type


# Python 2/3 bytes/unicode compat helpers

def _get_bytes_stream(base):
    if hasattr(base, 'buffer'): # Python 3
        return base.buffer
    if hasattr(base, 'stream'): # after pwkit.cli.unicode_stdio() is invoked.
        return base.stream
    return base # Hopefully ...

[docs]def get_stdout_bytes():
    """Get a reference to the standard output stream that accepts bytes, not
    unicode characters.

    Returns: a file-like object hooked up to the process’ standard output.

    Usually, you want to write text to a process's standard output stream
    (“stdout”), so you want :data:`sys.stdout` to be a stream that accepts
    Unicode. The function :func:`pwkit.cli.unicode_stdio` sets this up in
    Python 2, which has an imperfect hack to allow Unicode output to work most
    of the time. However, there are other times when you really *do* want to
    write arbitrary binary data to stdout. Depending on whether you’re using
    Python 2 or Python 3, or whether :func:`pwkit.cli.unicode_stdio` has been
    called, the right way to get access to the underlying byte-based stream is
    different. This function encapsulates these checks and works across all of
    these cases.

    """
    import sys
    return _get_bytes_stream(sys.stdout)

[docs]def get_stderr_bytes():
    """Get a reference to the standard error stream that accepts bytes, not
    unicode characters.

    Returns: a file-like object hooked up to the process’ standard error.

    Usually, you want to write text to a process's standard error stream
    (“stderr”), so you want :data:`sys.stderr` to be a stream that accepts
    Unicode. The function :func:`pwkit.cli.unicode_stdio` sets this up in
    Python 2, which has an imperfect hack to allow Unicode output to work most
    of the time. However, there are other times when you really *do* want to
    write arbitrary binary data to stderr. Depending on whether you’re using
    Python 2 or Python 3, or whether :func:`pwkit.cli.unicode_stdio` has been
    called, the right way to get access to the underlying byte-based stream is
    different. This function encapsulates these checks and works across all of
    these cases.

    """
    import sys
    return _get_bytes_stream(sys.stderr)


# Reading text.

[docs]def try_open (*args, **kwargs):
    """Simply a wrapper for io.open(), unless an IOError with errno=2 (ENOENT) is
    raised, in which case None is retured.

    """
    try:
        return io.open (*args, **kwargs)
    except IOError as e:
        if e.errno == 2:
            return None
        raise


[docs]def words (linegen):
    for line in linegen:
        a = line.split ('#', 1)[0].strip ().split ()
        if len (a):
            yield a


[docs]def pathwords (path, mode='rt', noexistok=False, **kwargs):
    try:
        with io.open (path, mode, **kwargs) as f:
            for line in f:
                a = line.split ('#', 1)[0].strip ().split ()
                if len (a):
                    yield a
    except IOError as e:
        if e.errno != 2 or not noexistok:
            raise


[docs]def pathlines (path, mode='rt', noexistok=False, **kwargs):
    try:
        with io.open (path, mode, **kwargs) as f:
            for line in f:
                yield line
    except IOError as e:
        if e.errno != 2 or not noexistok:
            raise


# Path manipulations -- should largely be superseded by the Path object

[docs]def make_path_func (*baseparts):
    """Return a function that joins paths onto some base directory."""
    from os.path import join
    base = join (*baseparts)
    def path_func (*args):
        return join (base, *args)
    return path_func


[docs]def djoin (*args):
    """'dotless' join, for nicer paths."""
    from os.path import join

    i = 0
    alen = len (args)

    while i < alen and (args[i] == '' or args[i] == '.'):
        i += 1

    if i == alen:
        return '.'

    return join (*args[i:])


# Doing stuff on the filesystem.

[docs]def rellink (source, dest):
    """Create a symbolic link to path *source* from path *dest*. If either
    *source* or *dest* is an absolute path, the link from *dest* will point to
    the absolute path of *source*. Otherwise, the link to *source* from *dest*
    will be a relative link.

    """
    from os.path import isabs, dirname, relpath, abspath

    if isabs (source):
        os.symlink (source, dest)
    elif isabs (dest):
        os.symlink (abspath (source), dest)
    else:
        os.symlink (relpath (source, dirname (dest)), dest)


[docs]def ensure_dir (path, parents=False):
    """Returns a boolean indicating whether the directory already existed. Will
    attempt to create parent directories if *parents* is True.

    """
    if parents:
        from os.path import dirname
        parent = dirname (path)
        if len (parent) and parent != path:
            ensure_dir (parent, True)

    try:
        os.mkdir (path)
    except OSError as e:
        if e.errno == 17: # EEXIST
            return True
        raise
    return False


[docs]def ensure_symlink (src, dst):
    """Ensure the existence of a symbolic link pointing to src named dst. Returns
    a boolean indicating whether the symlink already existed.

    """
    try:
        os.symlink (src, dst)
    except OSError as e:
        if e.errno == 17: # EEXIST
            return True
        raise
    return False


# Extended Path object. pathlib.Path objects have fancy __new__ semantics that
# we need to jump through some hoops for.

_ParentPath = pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath

[docs]class Path (_ParentPath):
    """This is an extended version of the :class:`pathlib.Path` class.
    (:mod:`pathlib` is built into Python 3.x and is available as a backport to
    Python 2.x.) It represents a path on the filesystem.

    """
    # Manipulations

[docs]    def expand (self, user=False, vars=False, glob=False, resolve=False):
        """Return a new :class:`Path` with various expansions performed. All
	expansions are disabled by default but can be enabled by passing in
	true values in the keyword arguments.

	user : bool (default False)
	  Expand ``~`` and ``~user`` home-directory constructs. If a username is
	  unmatched or ``$HOME`` is unset, no change is made. Calls
	  :func:`os.path.expanduser`.
	vars : bool (default False)
	  Expand ``$var`` and ``${var}`` environment variable constructs. Unknown
	  variables are not substituted. Calls :func:`os.path.expandvars`.
	glob : bool (default False)
	  Evaluate the path as a :mod:`glob` expression and use the matched path.
	  If the glob does not match anything, do not change anything. If the
	  glob matches more than one path, raise an :exc:`IOError`.
	resolve : bool (default False)
	  Call :meth:`resolve` on the return value before returning it.

        """
        from os import path
        from glob import glob

        text = text_type (self)
        if user:
            text = path.expanduser (text)
        if vars:
            text = path.expandvars (text)
        if glob:
            results = glob (text)
            if len (results) == 1:
                text = results[0]
            elif len (results) > 1:
                raise IOError ('glob of %r should\'ve returned 0 or 1 matches; got %d'
                               % (text, len (results)))

        other = self.__class__ (text)
        if resolve:
            other = other.resolve ()

        return other


[docs]    def format (self, *args, **kwargs):
        """Return a new path formed by calling :meth:`str.format` on the
        textualization of this path.

        """
        return self.__class__ (str (self).format (*args, **kwargs))


[docs]    def get_parent (self, mode='naive'):
        """Get the path of this path’s parent directory.

        Unlike the :attr:`parent` attribute, this function can correctly
        ascend into parent directories if *self* is ``"."`` or a sequence of
        ``".."``. The precise way in which it handles these kinds of paths,
        however, depends on the *mode* parameter:

        ``"textual"``
          Return the same thing as the :attr:`parent` attribute.
        ``"resolved"``
          As *textual*, but on the :meth:`resolve`-d version of the path. This
          will always return the physical parent directory in the filesystem.
          The path pointed to by *self* must exist for this call to succeed.
        ``"naive"``
          As *textual*, but the parent of ``"."`` is ``".."``, and the parent of
          a sequence of ``".."`` is the same sequence with another ``".."``. Note
          that this manipulation is still strictly textual, so results when called
          on paths like ``"foo/../bar/../other"`` will likely not be what you want.
          Furthermore, ``p.get_parent(mode="naive")`` never yields a path equal to
          ``p``, so some kinds of loops will execute infinitely.

        """
        if mode == 'textual':
            return self.parent

        if mode == 'resolved':
            return self.resolve ().parent

        if mode == 'naive':
            from os.path import pardir

            if not len (self.parts):
                return self.__class__ (pardir)
            if all (p == pardir for p in self.parts):
                return self / pardir
            return self.parent

        raise ValueError ('unhandled get_parent() mode %r' % (mode, ))


[docs]    def make_relative (self, other):
        """Return a new path that is the equivalent of this one relative to the path
        *other*. Unlike :meth:`relative_to`, this will not throw an error if *self* is
        not a sub-path of *other*; instead, it will use ``..`` to build a relative
        path. This can result in invalid relative paths if *other* contains a
        directory symbolic link.

        If *self* is an absolute path, it is returned unmodified.

        """
        if self.is_absolute ():
            return self

        from os.path import relpath
        other = self.__class__ (other)
        return self.__class__ (relpath (text_type (self), text_type (other)))


    # Filesystem interrogation

[docs]    def readlink (self):
        """Assuming that this path is a symbolic link, read its contents and
        return them as another :class:`Path` object. An "invalid argument"
        OSError will be raised if this path does not point to a symbolic link.

        """
        return self.__class__ (os.readlink (path_type (self)))


[docs]    def scandir (self):
        """Iteratively scan this path, assuming it’s a directory. This requires and
        uses the :mod:`scandir` module.

        `scandir` is different than `iterdir` because it generates `DirEntry`
        items rather than Path instances. DirEntry objects have their
        properties filled from the directory info itself, so querying them
        avoids syscalls that would be necessary with iterdir().

        The generated values are :class:`scandir.DirEntry` objects which have
        some information pre-filled. These objects have methods ``inode()``,
        ``is_dir()``, ``is_file()``, ``is_symlink()``, and ``stat()``. They
        have attributes ``name`` (the basename of the entry) and ``path`` (its
        full path).

        """
        if hasattr (os, 'scandir'):
            scandir = os.scandir
        else:
            from scandir import scandir

        return scandir (path_type (self))


    # Filesystem modification

[docs]    def copy_to (self, dest, preserve='mode'):
        """Copy this path — as a file — to another *dest*.

        The *preserve* argument specifies which meta-properties of the file
        should be preserved:

        ``none``
          Only copy the file data.
        ``mode``
          Copy the data and the file mode (permissions, etc).
        ``all``
          Preserve as much as possible: mode, modification times, etc.

        The destination *dest* may be a directory.

        Returns the final destination path.

        """
        # shutil.copyfile() doesn't let the destination be a directory, so we
        # have to manage that possibility ourselves.

        import shutil
        dest = Path (dest)

        if dest.is_dir ():
            dest = dest / self.name

        if preserve == 'none':
            shutil.copyfile (str(self), str(dest))
        elif preserve == 'mode':
            shutil.copy (str(self), str(dest))
        elif preserve == 'all':
            shutil.copy2 (str(self), str(dest))
        else:
            raise ValueError ('unrecognized "preserve" value %r' % (preserve,))

        return dest


[docs]    def ensure_dir (self, mode=0o777, parents=False):
        """Ensure that this path exists as a directory.

        This function calls :meth:`mkdir` on this path, but does not raise an
        exception if it already exists. It does raise an exception if this
        path exists but is not a directory. If the directory is created,
        *mode* is used to set the permissions of the resulting directory, with
        the important caveat that the current :func:`os.umask` is applied.

        It returns a boolean indicating if the directory was actually created.

        If *parents* is true, parent directories will be created in the same
        manner.

        """
        if parents:
            p = self.parent
            if p == self:
                return False # can never create root; avoids loop when parents=True
            p.ensure_dir (mode, True)

        made_it = False

        try:
            self.mkdir (mode)
            made_it = True
        except OSError as e:
            if e.errno == 17: # EEXIST?
                return False # that's fine
            raise # other exceptions are not fine

        if not self.is_dir ():
            import errno
            raise OSError (errno.ENOTDIR, 'Not a directory', str(self))

        return made_it


[docs]    def ensure_parent (self, mode=0o777, parents=False):
        """Ensure that this path's *parent* directory exists.

        Returns a boolean whether the parent directory was created. Will
        attempt to create superior parent directories if *parents* is true.

        """
        return self.parent.ensure_dir (mode, parents)


    class _PathTempfileContextManager (object):
        def __init__ (self, reference, want, resolution, suffix, kwargs):
            self.reference = reference
            self.want = want
            self.resolution = resolution
            self.suffix = suffix
            self.kwargs = kwargs

        def __enter__ (self):
            from tempfile import NamedTemporaryFile

            # Pretty hacky: we know that we've been called from
            # create_tempfile() when `reference` is a class.

            if isinstance (self.reference, type):
                dir = None
                prefix = 'tmp'
                refcls = self.reference
            else:
                dir = str(self.reference.parent)
                prefix = (self.reference.name + '.')
                refcls = self.reference.__class__

            self.handle = NamedTemporaryFile (
                dir = dir,
                prefix = prefix,
                suffix = self.suffix,
                delete = False,
                **self.kwargs
            )

            self.temppath = refcls (self.handle.name)
            self.handle.path = self.temppath

            if self.want == 'handle':
                return self.handle

            if self.want == 'path':
                self.handle.close ()
                self.handle = None
                return self.temppath

            assert False, 'not reached'

        def __exit__ (self, etype, evalue, etb):
            if self.handle is not None:
                self.handle.close ()

            if etype is not None:
                # On error, keep the tempfile
                return False

            if self.resolution == 'unlink':
                self.temppath.unlink ()
            elif self.resolution == 'try_unlink':
                self.temppath.try_unlink ()
            elif self.resolution == 'keep':
                pass
            elif self.resolution == 'overwrite':
                self.temppath.rename (self.reference)
            else:
                assert False, 'not reached'

            return False


[docs]    def make_tempfile (self, want='handle', resolution='try_unlink', suffix='', **kwargs):
        """Get a context manager that creates and cleans up a uniquely-named temporary
        file with a name similar to this path.

        This function returns a context manager that creates a secure
        temporary file with a path similar to *self*. In particular, if
        ``str(self)`` is something like ``foo/bar``, the path of the temporary
        file will be something like ``foo/bar.ame8_2``.

        The object returned by the context manager depends on the *want* argument:

        ``"handle"``
          An open file-like object is returned. This is the object returned by
          :class:`tempfile.NamedTemporaryFile`. Its name on the filesystem is
          accessible as a string as its `name` attribute, or (a customization here)
          as a :class:`Path` instance as its `path` attribute.
        ``"path"``
          The temporary file is created as in ``"handle"``, but is then immediately
          closed. A :class:`Path` instance pointing to the path of the temporary file is
          instead returned.

        If an exception occurs inside the context manager block, the temporary file is
        left lying around. Otherwise, what happens to it upon exit from the context
        manager depends on the *resolution* argument:

        ``"try_unlink"``
          Call :meth:`try_unlink` on the temporary file — no exception is raised if
          the file did not exist.
        ``"unlink"``
          Call :meth:`unlink` on the temporary file — an exception is raised if
          the file did not exist.
        ``"keep"``
          The temporary file is left lying around.
        ``"overwrite"``
          The temporary file is :meth:`rename`-d to overwrite *self*.

        For instance, when rewriting important files, it’s typical to write
        the new data to a temporary file, and only rename the temporary file
        to the final destination at the end — that way, if a problem happens
        while writing the new data, the original file is left unmodified;
        otherwise you’d be stuck with a partially-written version of the file.
        This pattern can be accomplished with::

          p = Path ('path/to/important/file')
          with p.make_tempfile (resolution='overwrite', mode='wt') as h:
              print ('important stuff goes here', file=h)

        The *suffix* argument is appended to the temporary file name after the
        random portion. It defaults to the empty string. If you want it to
        operate as a typical filename suffix, include a leading ``"."``.

        Other **kwargs** are passed to :class:`tempfile.NamedTemporaryFile`.

        """
        if want not in ('handle', 'path'):
            raise ValueError ('unrecognized make_tempfile() "want" mode %r' % (want,))
        if resolution not in ('unlink', 'try_unlink', 'keep', 'overwrite'):
            raise ValueError ('unrecognized make_tempfile() "resolution" mode %r' % (resolution,))
        return Path._PathTempfileContextManager (self, want, resolution, suffix, kwargs)


[docs]    @classmethod
    def create_tempfile (cls, want='handle', resolution='try_unlink', suffix='', **kwargs):
        if want not in ('handle', 'path'):
            raise ValueError ('unrecognized create_tempfile() "want" mode %r' % (want,))
        if resolution not in ('unlink', 'try_unlink', 'keep'):
            raise ValueError ('unrecognized create_tempfile() "resolution" mode %r' % (resolution,))
        return cls._PathTempfileContextManager (cls, want, resolution, suffix, kwargs)


[docs]    def rellink_to (self, target, force=False):
        """Make this path a symlink pointing to the given *target*, generating the
	proper relative path using :meth:`make_relative`. This gives different
	behavior than :meth:`symlink_to`. For instance, ``Path
	('a/b').symlink_to ('c')`` results in ``a/b`` pointing to the path
	``c``, whereas :meth:`rellink_to` results in it pointing to ``../c``.
	This can result in broken relative paths if (continuing the example)
	``a`` is a symbolic link to a directory.

	If either *target* or *self* is absolute, the symlink will point at
	the absolute path to *target*. The intention is that if you’re trying
	to link ``/foo/bar`` to ``bee/boo``, it probably makes more sense for
	the link to point to ``/path/to/.../bee/boo`` rather than
	``../../../../bee/boo``.

	If *force* is true, :meth:`try_unlink` will be called on *self* before
	the link is made, forcing its re-creation.

        """
        target = self.__class__ (target)

        if force:
            self.try_unlink ()

        if self.is_absolute ():
            target = target.absolute () # force absolute link

        return self.symlink_to (target.make_relative (self.parent))


[docs]    def rmtree (self, errors='warn'):
        """Recursively delete this directory and its contents. The *errors* keyword
        specifies how errors are handled:

        "warn" (the default)
          Print a warning to standard error.
        "ignore"
          Ignore errors.

        """
        import shutil

        if errors == 'ignore':
            ignore_errors = True
            onerror = None
        elif errors == 'warn':
            ignore_errors = False
            from .cli import warn

            def onerror (func, path, exc_info):
                warn ('couldn\'t rmtree %s: in %s of %s: %s', self, func.__name__,
                      path, exc_info[1])
        else:
            raise ValueError ('unexpected "errors" keyword %r' % (errors,))

        shutil.rmtree (text_type (self), ignore_errors=ignore_errors, onerror=onerror)
        return self


[docs]    def try_unlink (self):
        """Try to unlink this path. If it doesn't exist, no error is returned. Returns
        a boolean indicating whether the path was really unlinked.

        """
        try:
            self.unlink ()
            return True
        except OSError as e:
            if e.errno == 2:
                return False # ENOENT
            raise


    # Data I/O

[docs]    def try_open (self, null_if_noexist=False, **kwargs):
        """Call :meth:`Path.open` on this path (passing *kwargs*) and return the
        result. If the file doesn't exist, the behavior depends on
        *null_if_noexist*. If it is false (the default), ``None`` is returned.
        Otherwise, :data:`os.devnull` is opened and returned.

        """
        try:
            return self.open (**kwargs)
        except IOError as e:
            if e.errno == 2:
                if null_if_noexist:
                    import io, os
                    return io.open (os.devnull, **kwargs)
                return None
            raise


[docs]    def as_hdf_store (self, mode='r', **kwargs):
        """Return the path as an opened :class:`pandas.HDFStore` object. Note that the
        :class:`HDFStore` constructor unconditionally prints messages to
        standard output when opening and closing files, so use of this
        function will pollute your program’s standard output. The *kwargs* are
        forwarded to the :class:`HDFStore` constructor.

        """
        from pandas import HDFStore
        return HDFStore (text_type (self), mode=mode, **kwargs)


[docs]    def read_astropy_ascii (self, **kwargs):
        """Open as an ASCII table, returning a :class:`astropy.table.Table` object.
        Keyword arguments are passed to :func:`astropy.io.ascii.open`; valid
        ones likely include:

        - ``names = <list>`` (column names)
        - ``format`` ('basic', 'cds', 'csv', 'ipac', ...)
        - ``guess = True`` (guess table format)
        - ``delimiter`` (column delimiter)
        - ``comment = <regex>``
        - ``header_start = <int>`` (line number of header, ignoring blank and comment lines)
        - ``data_start = <int>``
        - ``data_end = <int>``
        - ``converters = <dict>``
        - ``include_names = <list>`` (names of columns to include)
        - ``exclude_names = <list>`` (names of columns to exclude; applied after include)
        - ``fill_values = <dict>`` (filler values)

        """
        from astropy.io import ascii
        return ascii.read (text_type (self), **kwargs)


[docs]    def read_fits (self, **kwargs):
        """Open as a FITS file, returning a :class:`astropy.io.fits.HDUList` object.
        Keyword arguments are passed to :func:`astropy.io.fits.open`; valid
        ones likely include:

        - ``mode = 'readonly'`` (or "update", "append", "denywrite", "ostream")
        - ``memmap = None``
        - ``save_backup = False``
        - ``cache = True``
        - ``uint = False``
        - ``ignore_missing_end = False``
        - ``checksum = False``
        - ``disable_image_compression = False``
        - ``do_not_scale_image_data = False``
        - ``ignore_blank = False``
        - ``scale_back = False``

        """
        from astropy.io import fits
        return fits.open (text_type (self), **kwargs)


[docs]    def read_fits_bintable (self, hdu=1, drop_nonscalar_ok=True, **kwargs):
        """Open as a FITS file, read in a binary table, and return it as a
        :class:`pandas.DataFrame`, converted with
        :func:`pkwit.numutil.fits_recarray_to_data_frame`. The *hdu* argument
        specifies which HDU to read, with its default 1 indicating the first
        FITS extension. The *drop_nonscalar_ok* argument specifies if
        non-scalar table values (which are inexpressible in
        :class:`pandas.DataFrame`s) should be silently ignored (``True``) or
        cause a :exc:`ValueError` to be raised (``False``). Other **kwargs**
        are passed to :func:`astropy.io.fits.open`, (see
        :meth:`Path.read_fits`) although the open mode is hardcoded to be
        ``"readonly"``.

        """
        from astropy.io import fits
        from .numutil import fits_recarray_to_data_frame as frtdf

        with fits.open (text_type (self), mode='readonly', **kwargs) as hdulist:
            return frtdf (hdulist[hdu].data, drop_nonscalar_ok=drop_nonscalar_ok)


[docs]    def read_hdf (self, key, **kwargs):
        """Open as an HDF5 file using :mod:`pandas` and return the item stored under
        the key *key*. *kwargs* are passed to :func:`pandas.read_hdf`.

        """
        # This one needs special handling because of the "key" and path input.
        import pandas
        return pandas.read_hdf (text_type (self), key, **kwargs)


[docs]    def read_inifile (self, noexistok=False, typed=False):
        """Open assuming an “ini-file” format and return a generator yielding data
        records using either :func:`pwkit.inifile.read_stream` (if *typed* is
        false) or :func:`pwkit.tinifile.read_stream` (if it’s true). The
        latter version is designed to work with numerical data using the
        :mod:`pwkit.msmt` subsystem. If *noexistok* is true, a nonexistent
        file will result in no items being generated rather than an
        :exc:`IOError` being raised.

        """
        if typed:
            from .tinifile import read_stream
        else:
            from .inifile import read_stream

        try:
            with self.open ('rb') as f:
                for item in read_stream (f):
                    yield item
        except IOError as e:
            if e.errno != 2 or not noexistok:
                raise


[docs]    def read_json (self, mode='rt', **kwargs):
        """Use the :mod:`json` module to read in this file as a JSON-formatted data
        structure. Keyword arguments are passed to :func:`json.load`. Returns the
        read-in data structure.

        """
        import json

        with self.open (mode=mode) as f:
            return json.load (f, **kwargs)


[docs]    def read_lines (self, mode='rt', noexistok=False, **kwargs):
        """Generate a sequence of lines from the file pointed to by this path, by
        opening as a regular file and iterating over it. The lines therefore
        contain their newline characters. If *noexistok*, a nonexistent file
        will result in an empty sequence rather than an exception. *kwargs*
        are passed to :meth:`Path.open`.

        """
        try:
            with self.open (mode=mode, **kwargs) as f:
                for line in f:
                    yield line
        except IOError as e:
            if e.errno != 2 or not noexistok:
                raise


[docs]    def read_numpy (self, **kwargs):
        """Read this path into a :class:`numpy.ndarray` using :func:`numpy.load`.
        *kwargs* are passed to :func:`numpy.load`; they likely are:

	mmap_mode : None, 'r+', 'r', 'w+', 'c'
          Load the array using memory-mapping
	allow_pickle : bool = True
          Whether Pickle-format data are allowed; potential security hazard.
	fix_imports : bool = True
	  Try to fix Python 2->3 import renames when loading Pickle-format data.
	encoding : 'ASCII', 'latin1', 'bytes'
	  The encoding to use when reading Python 2 strings in Pickle-format data.

        """
        import numpy as np
        with self.open ('rb') as f:
            return np.load (f, **kwargs)


[docs]    def read_numpy_text (self, dfcols=None, **kwargs):
        """Read this path into a :class:`numpy.ndarray` as a text file using
        :func:`numpy.loadtxt`. In normal conditions the returned array is
        two-dimensional, with the first axis spanning the rows in the file and
        the second axis columns (but see the *unpack* and *dfcols* keywords).

        If *dfcols* is not None, the return value is a
        :class:`pandas.DataFrame` constructed from the array. *dfcols* should
        be an iterable of column names, one for each of the columns returned
        by the :func:`numpy.loadtxt` call. For convenience, if *dfcols* is a
        single string, it will by turned into an iterable by a call to
        :func:`str.split`.

        The remaining *kwargs* are passed to :func:`numpy.loadtxt`; they likely are:

	dtype : data type
	  The data type of the resulting array.
	comments : str
	  If specific, a character indicating the start of a comment.
	delimiter : str
	  The string that separates values. If unspecified, any span of whitespace works.
	converters : dict
	  A dictionary mapping zero-based column *number* to a function that will
	  turn the cell text into a number.
	skiprows : int (default=0)
	  Skip this many lines at the top of the file
	usecols : sequence
	  Which columns keep, by number, starting at zero.
	unpack : bool (default=False)
	  If true, the return value is transposed to be of shape ``(cols, rows)``.
	ndmin : int (default=0)
	  The returned array will have at least this many dimensions; otherwise
	  mono-dimensional axes will be squeezed.

        """
        import numpy as np

        if dfcols is not None:
            kwargs['unpack'] = True

        retval = np.loadtxt (text_type (self), **kwargs)

        if dfcols is not None:
            import pandas as pd
            if isinstance (dfcols, six.string_types):
                dfcols = dfcols.split ()
            retval = pd.DataFrame (dict (zip (dfcols, retval)))

        return retval


[docs]    def read_pandas (self, format='table', **kwargs):
        """Read using :mod:`pandas`. The function ``pandas.read_FORMAT`` is called
        where ``FORMAT`` is set from the argument *format*. *kwargs* are
        passed to this function. Supported formats likely include
        ``clipboard``, ``csv``, ``excel``, ``fwf``, ``gbq``, ``html``,
        ``json``, ``msgpack``, ``pickle``, ``sql``, ``sql_query``,
        ``sql_table``, ``stata``, ``table``. Note that ``hdf`` is not
        supported because it requires a non-keyword argument; see
        :meth:`Path.read_hdf`.

        """
        import pandas

        reader = getattr (pandas, 'read_' + format, None)
        if not callable (reader):
            raise PKError ('unrecognized Pandas format %r: no function pandas.read_%s',
                           format, format)

        with self.open ('rb') as f:
            return reader (f, **kwargs)


[docs]    def read_pickle (self):
        """Open the file, unpickle one object from it using :mod:`pickle`, and return
        it.

        """
        gen = self.read_pickles ()
        value = gen.next ()
        gen.close ()
        return value


[docs]    def read_pickles (self):
        """Generate a sequence of objects by opening the path and unpickling items
        until EOF is reached.

        """
        try:
            import cPickle as pickle
        except ImportError:
            import pickle

        with self.open (mode='rb') as f:
            while True:
                try:
                    obj = pickle.load (f)
                except EOFError:
                    break
                yield obj


[docs]    def read_tabfile (self, **kwargs):
        """Read this path as a table of typed measurements via
        :func:`pwkit.tabfile.read`. Returns a generator for a sequence of
        :class:`pwkit.Holder` objects, one for each row in the table, with
        attributes for each of the columns.

        tabwidth : int (default=8)
            The tab width to assume. Defaults to 8 and should not be changed unless
            absolutely necessary.
        mode : str (default='rt')
            The file open mode, passed to :func:`io.open`.
        noexistok : bool (default=False)
            If true, a nonexistent file will result in no items being generated, as
            opposed to an :exc:`IOError`.
        kwargs : keywords
            Additional arguments are passed to :func:`io.open`.

        """
        from .tabfile import read
        return read (text_type (self), **kwargs)


[docs]    def read_text(self, encoding=None, errors=None, newline=None):
        """Read this path as one large chunk of text.

        This function reads in the entire file as one big piece of text and
        returns it. The *encoding*, *errors*, and *newline* keywords are
        passed to :meth:`open`.

        This is not a good way to read files unless you know for sure that they
        are small.

        """
        with self.open (mode='rt', encoding=encoding, errors=errors, newline=newline) as f:
            return f.read()


[docs]    def read_toml(self, encoding=None, errors=None, newline=None, **kwargs):
        """Read this path as a TOML document.

        The `TOML <https://github.com/toml-lang/toml>`_ parsing is done with
        the :mod:`pytoml` module. The *encoding*, *errors*, and *newline*
        keywords are passed to :meth:`open`. The remaining *kwargs* are passed
        to :meth:`toml.load`.

        Returns the decoded data structure.

        """
        import pytoml

        with self.open (mode='rt', encoding=encoding, errors=errors, newline=newline) as f:
            return pytoml.load (f, **kwargs)


[docs]    def read_yaml (self, encoding=None, errors=None, newline=None, **kwargs):
        """Read this path as a YAML document.

        The YAML parsing is done with the :mod:`yaml` module. The *encoding*,
        *errors*, and *newline* keywords are passed to :meth:`open`. The
        remaining *kwargs* are passed to :meth:`yaml.load`.

        Returns the decoded data structure.

        """
        import yaml

        with self.open (mode='rt', encoding=encoding, errors=errors, newline=newline) as f:
            return yaml.load (f, **kwargs)


[docs]    def write_pickle (self, obj):
        """Dump *obj* to this path using :mod:`cPickle`."""
        self.write_pickles ((obj, ))


[docs]    def write_pickles (self, objs):
        """*objs* must be iterable. Write each of its values to this path in sequence
        using :mod:`cPickle`.

        """
        try:
            import cPickle as pickle
        except ImportError:
            import pickle

        with self.open (mode='wb') as f:
            for obj in objs:
                pickle.dump (obj, f)


[docs]    def write_yaml (self, data, encoding=None, errors=None, newline=None, **kwargs):
        """Read *data* to this path as a YAML document.

        The *encoding*, *errors*, and *newline* keywords are passed to
        :meth:`open`. The remaining *kwargs* are passed to :meth:`yaml.dump`.

        """
        import yaml

        with self.open (mode='wt', encoding=encoding, errors=errors, newline=newline) as f:
            return yaml.dump (data, stream=f, **kwargs)


del _ParentPath