Python input/output

From wikinotes

Concepts

Most of python's in/out centers around file-descriptors, pipes, or other stream objects. The idea is that not the entire stream needs to be loaded into memory before the item is read.

stream objects

You generally need to open a stream object for reading/writing. Here are the ways you can do so:

'w'           # write
'r'           # read
'a'           # append

# modifiers
'r+'          # read + write
'rb', 'wb'    # (b) indicates read or write raw bytes


Once it is open, you can work with it:

fd.read()          # read entire stream
fd.write('text')   # write to file-descriptor
fd.seek(5)         # go to the Nth character in the stream
fd.tell()          # print current character in the stream

for line in fd:
    print(line)

files

with open('/path/to/file.txt', 'r') as fd:
    for line in fd.readlines():
        print(line)

with open('/path/to/file.txt', 'w') as fd:
    fd.write('awesome')

stdin/stdout/stderr

when something is piped to your python program, it is exposed as sys.stdin.

sys.stdin
sys.stdout
sys.stderr

reading from stdin if there is not no input causes an indefinite wait time. You can check if something is available to read from stdin using the following (works on both windows and linux -- tested).

import sys

if sys.stdin.isatty():
    print('no stdin input')
else:
    print('stdin can be read from')

wrap/modify stdin/stdout/stderr in subprocess

#!/usr/bin/env python

import enum
import os
import re
import select
import subprocess
import sys


class AnsiColour(enum.Enum):
    """ Enum of 8x ANSI colours.
    """
    BLACK = 0
    RED = 1
    GREEN = 2
    YELLOW = 3
    BLUE = 4
    MAGENTA = 5
    CYAN = 6
    WHITE = 7

    def colourize(self, text):
        """ Returns text, but colourized.
        """
        return "\033[3" + str(self.value) + "m" + text + self.reset()

    def reset(self):
        """ Resets-colour formatting to defaults.
        """
        return "\033[0m"


class MiddlewareBase:
    """ Base Class for middleware
    """
    def apply(self, text):
        raise NotImplementedError()


class ColourizeFilenamesMiddleware(MiddlewareBase):
    def __init__(self, colour):
        """ Choose an `AnsiColour` enum value to colourize filenames in.
        """
        self.colour = colour

    def apply(self, text):
        return re.sub('(/[^/]+.rb:[0-9]+)', self.colour.colourize('\\1'), text)


class WrapSubprocess:
    def wrap(self, pipe, stdout_middleware=None, stderr_middleware=None):
        """ Wraps a subprocess.Popen(), allowing you to modify it's stdin/stdout/stderr with lists of 'middleware' functions.
        """
        while True:
            # exit when subprocess exits
            if pipe.poll() is not None:
                sys.exit(pipe.returncode)

            readable, _, _ = select.select([sys.stdin.fileno(), pipe.stdout.fileno(), pipe.stderr.fileno()], [], [])

            if pipe.stdout.fileno() in readable:
                self._read_stream_to(pipe.stdout, sys.stdout, stdout_middleware)

            if pipe.stderr.fileno() in readable:
                self._read_stream_to(pipe.stderr, sys.stderr, stderr_middleware)

            if sys.stdin.fileno() in readable:
                self._write_stream_to(sys.stdin.buffer, pipe.stdin)

    def _read_stream_to(self, input, output, middleware_stack=None):
        """ Repeats all bytes written to file-object input to file-object output.
        """
        text = input.peek()   # preview available bytes
        input.read(len(text)) # hacky seek
        if text:
            text = self._apply_middleware(middleware_stack, text.decode("utf-8"))
            output.write(text)
            output.flush()

    def _write_stream_to(self, input, output):
        """ Repeats all bytes read from file-object input to file-object output.
        """
        text = input.peek()   # preview available bytes
        input.read(len(text)) # hacky seek
        if isinstance(text, bytes):
            output.write(text)
        else:
            output.write(text.encode("utf-8"))
        output.flush()

    def _apply_middleware(self, middleware_stack, text):
        """ Calls apply(text) method on each `Middleware`, which returns a new text object.

        Example:

            .. code-block:: python

                def wrap(text):
                    return "a" + text

                apply_middleware([wrap, wrap], "foo")
                >>> "aafoo"
        """
        if not middleware_stack:
            return text

        for middleware in middleware_stack:
            text = middleware.apply(text)
        return text


if __name__ == '__main__':
    wrapper = WrapSubprocess()
    middleware_stack = [ColourizeFilenamesMiddleware(AnsiColour.RED)]

    # cmds = sys.argv[1:]
    cmds = ["/Users/will/.gem/ruby/3.2.2/bin/rails", "test", "/Users/will/src/github.com/ShopifyFRS/bourgeois/app/components/offer_generation/test/lib/shop_info_cache_test.rb"]
    pipe = subprocess.Popen(cmds, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=os.environ)
    wrapper.wrap(
        pipe,
        stdout_middleware=middleware_stack,
        stderr_middleware=middleware_stack
    )

io.StringIO

Sometimes it is useful to operate on a fake file-descriptor that lives in memory.

import io
fd = io.StreamIO()
fd.write('abc\n')
fd.write('def\n')

print(fd.getvalue())
>>> abc
>>> def

fd.close()

sockets

You can read/write sockets. See python networking.

pprint

import pprint

# default pprint
pprint.pprint({'a': 1, 'b':2})

# custom pprint
pp = pprint.PrettyPrinter(indent=2, width=1)
pp.pprint({'a': 1, 'b':2})