Skip to content

make_data_extract.py

Get the categories and associated XLSFiles from the config file.

Returns:

Type Description
list

A list of the XLSForms included in osm-fieldwork

Source code in osm_fieldwork/make_data_extract.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def getChoices():
    """Get the categories and associated XLSFiles from the config file.

    Returns:
        (list): A list of the XLSForms included in osm-fieldwork
    """
    data = dict()
    if os.path.exists(f"{data_models_path}/category.yaml"):
        file = open(f"{data_models_path}/category.yaml", "r").read()
        contents = yaml.load(file, Loader=yaml.Loader)
        for entry in contents:
            [[k, v]] = entry.items()
            data[k] = v[0]
    return data

options: show_source: false heading_level: 3

Bases: object

Class to handle SQL queries for the categories.

Parameters:

Name Type Description Default
dburi str

The URI string for the database connection

required
config str

The filespec for the query config file

required
xlsfile str

The filespec for the XLSForm file

required

Returns:

Type Description
MakeExtract

An instance of this object

Source code in osm_fieldwork/make_data_extract.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def __init__(
    self,
    dburi: str,
    config: str,
    xlsfile: str,
):
    """Initialize the postgres handler.

    Args:
        dburi (str): The URI string for the database connection
        config (str): The filespec for the query config file
        xlsfile (str): The filespec for the XLSForm file

    Returns:
        (MakeExtract): An instance of this object
    """
    self.db = PostgresClient(dburi, f"{data_models_path}/{config}.yaml")

    # Read in the XLSFile
    if "/" in xlsfile:
        file = open(xlsfile, "rb")
    else:
        file = open(f"{xlsforms_path}/{xlsfile}", "rb")
    self.xls = BytesIO(file.read())
    self.config = QueryConfig(config)

getFeatures

getFeatures(boundary, polygon)

Extract features from Postgres.

Parameters:

Name Type Description Default
boundary str

The filespec for the project AOI in GeoJson format

required
filespec str

The optional output file for the query

required
polygon bool

Whether to have the full geometry or just centroids returns

required

Returns:

Type Description
FeatureCollection

The features returned from the query

Source code in osm_fieldwork/make_data_extract.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def getFeatures(
    self,
    boundary: FeatureCollection,
    polygon: bool,
):
    """Extract features from Postgres.

    Args:
        boundary (str): The filespec for the project AOI in GeoJson format
        filespec (str): The optional output file for the query
        polygon (bool): Whether to have the full geometry or just centroids returns

    Returns:
        (FeatureCollection): The features returned from the query
    """
    log.info("Extracting features from Postgres...")

    if "features" in boundary:
        poly = boundary["features"][0]["geometry"]
    else:
        poly = boundary["geometry"]
    shape(poly)

    collection = self.db.execQuery(boundary, None, False)
    if not collection:
        return None

    return collection

cleanFeatures

cleanFeatures(collection)

Filter out any data not in the data_model.

Parameters:

Name Type Description Default
collection bytes

The input data or filespec to the input data file

required

Returns:

Type Description
FeatureCollection

The modifed data

Source code in osm_fieldwork/make_data_extract.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def cleanFeatures(
    self,
    collection: FeatureCollection,
):
    """Filter out any data not in the data_model.

    Args:
        collection (bytes): The input data or filespec to the input data file

    Returns:
        (FeatureCollection): The modifed data

    """
    log.debug("Cleaning features")
    cleaned = FilterData()
    cleaned.parse(self.xls, self.config)
    new = cleaned.cleanData(collection)
    # jsonfile = open(filespec, "w")
    # dump(new, jsonfile)
    return new

options: show_source: false heading_level: 3