Skip to content

filter_data.py

Bases: object

Returns:

Type Description
FilterData

An instance of this object

Source code in osm_fieldwork/filter_data.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def __init__(
    self,
    filespec: str = None,
    config: QueryConfig = None,
):
    """Args:
        filespec (str): The optional data file to read.

    Returns:
        (FilterData): An instance of this object
    """
    self.tags = dict()
    self.qc = config
    if filespec and config:
        self.parse(filespec, config)

parse

parse(filespec, config)

Read in the XLSForm and extract the data we want.

Parameters:

Name Type Description Default
filespec str

The filespec to the XLSForm file

required

Returns:

Name Type Description
title str

The title from the XLSForm Setting sheet

extract str

The data extract filename from the XLSForm Survey sheet

Source code in osm_fieldwork/filter_data.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def parse(
    self,
    filespec: str,
    config: QueryConfig,
):
    """Read in the XLSForm and extract the data we want.

    Args:
        filespec (str): The filespec to the XLSForm file

    Returns:
        title (str): The title from the XLSForm Setting sheet
        extract (str): The data extract filename from the XLSForm Survey sheet
    """
    if config:
        self.qc = config
    excel_object = pd.ExcelFile(filespec)
    entries = excel_object.parse(sheet_name=[0, 1, 2], index_col=0, usercols=[0, 1, 2])
    entries = pd.read_excel(filespec, sheet_name=[0, 1, 2])
    title = entries[2]["form_title"].to_list()[0]
    extract = ""
    for entry in entries[0]["type"]:
        if str(entry) == "nan":
            continue
        if entry[:20] == "select_one_from_file":
            extract = entry[21:]
            log.info(f'Got data extract filename: "{extract}", title: "{title}"')
        else:
            extract = "none"
    total = len(entries[1]["list_name"])
    index = 1
    while index < total:
        key = entries[1]["list_name"][index]
        if key == "model" or str(key) == "nan":
            index += 1
            continue
        value = entries[1]["name"][index]
        if value == "<text>" or str(value) == "null":
            index += 1
            continue
        if key not in self.tags:
            self.tags[key] = list()
        self.tags[key].append(value)
        index += 1

    # The yaml config file for the query has a list of columns
    # to keep in addition to this default set. These wind up
    # in the SELECT
    keep = (
        "name",
        "name:en",
        "id",
        "operator",
        "addr:street",
        "addr:housenumber",
        "osm_id",
        "title",
        "tags",
        "label",
        "landuse",
        "opening_hours",
        "tourism",
    )
    self.keep = list(keep)
    if "keep" in config.config["keep"]:
        self.keep.extend(config.config["keep"])

    return title, extract

cleanData

cleanData(data)

Filter out any data not in the data_model.

Parameters:

Name Type Description Default
data bytes

The input data or filespec to the input data file

required

Returns:

Type Description
FeatureCollection

The modifed data

Source code in osm_fieldwork/filter_data.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def cleanData(
    self,
    data,
):
    """Filter out any data not in the data_model.

    Args:
        data (bytes): The input data or filespec to the input data file

    Returns:
        (FeatureCollection): The modifed data

    """
    log.debug("Cleaning data...")
    if type(data) == str:
        outfile = open(f"new-{data}", "x")
        infile = open(tmpfile, "r")
        indata = geojson.load(infile)
    elif type(data) == bytes:
        indata = eval(data.decode())
    else:
        indata = data
    # these just create noise in the log file
    ignore = (
        "timestamp",
        "version",
        "changeset",
    )
    keep = ("osm_id", "id", "version")
    collection = list()
    for feature in indata["features"]:
        # log.debug(f"FIXME0: {feature}")
        properties = dict()
        for key, value in feature["properties"].items():
            # log.debug(f"{key} = {value}")
            # FIXME: this is a hack!
            if True:
                if key == "tags":
                    for k, v in value.items():
                        if k[:4] == "name":
                            properties["title"] = value[k]
                            properties["label"] = value[k]
                        else:
                            properties[k] = v
                else:
                    if key == "osm_id":
                        properties["id"] = value
                        properties["title"] = value
                        properties["label"] = value
                    else:
                        properties[key] = value
                        if key[:4] == "name":
                            properties["title"] = value
                            properties["label"] = value
            else:
                log.debug(f"FIXME2: {key} = {value}")
                if key in keep:
                    properties[key] = value
                    continue
                if key in self.tags:
                    if key == "name" or key == "name:en":
                        properties["title"] = self.tags[key]
                        properties["label"] = self.tags[key]
                    if value in self.tags[key]:
                        properties[key] = value
                    else:
                        if value != "yes":
                            log.warning(f"Value {value} not in the data model!")
                        continue
                else:
                    if key in ignore:
                        continue
                    log.warning(f"Tag {key} not in the data model!")
                    continue
        if "title" not in properties:
            properties["label"] = properties["id"]
            properties["title"] = properties["id"]
        newfeature = Feature(geometry=feature["geometry"], properties=properties)
        collection.append(newfeature)
    if type(data) == str:
        geojson.dump(FeatureCollection(collection), outfile)
    return FeatureCollection(collection)

options: show_source: false heading_level: 3