1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3

'''
Functions for working with CSV files.
'''

import csv
import logging
import pathlib


LOGGER = logging.getLogger(__name__)


class CSVWrapper():
    '''
    Wrapper around a CSV file to faciliate appending rows in real-time via an
    iterable of dictionaries.
    '''
    ENCODING = 'utf-8'

    def __init__(self, path, **kwargs):
        self.path = pathlib.Path(path).resolve()
        self.kwargs = kwargs

    @property
    def headers(self):
        '''
        The current headers from the CSV file, or None if they do not exist.

        This assumes that the first row contains the headers.
        '''
        try:
            with self.path.open('r', encoding=self.ENCODING) as csvfile:
                reader = csv.reader(csvfile, **self.kwargs)
                for row in reader:
                    return row
        except FileNotFoundError:
            pass
        return None

    def __iter__(self):
        try:
            with self.path.open('r', encoding=self.ENCODING) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    yield row
        except FileNotFoundError:
            pass

    def write_rows(self, rows, headers=None, append=False):
        '''
        Append rows to the CSV file.

        Args:
            rows:
                A generator of dicts.
        '''
        if append:
            existing_headers = self.headers
            if existing_headers is not None:
                if headers is not None and existing_headers != headers:
                    LOGGER.warning('Re-using existing headers: %s', existing_headers)
                headers = existing_headers

        first_row = None
        try:
            if headers is None:
                first_row = next(rows)
                headers = sorted(first_row)
        except StopIteration:
            LOGGER.warning('Attempting to append 0 rows to %s', self.path)

        mode = 'a' if append else 'w'
        self.path.parent.mkdir(parents=True, exist_ok=True)
        with self.path.open(mode, encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers, **self.kwargs)

            if first_row:
                writer.writeheader()
                writer.writerow(first_row)
                csvfile.flush()

            for row in rows:
                writer.writerow(row)
                csvfile.flush()