Configuration of the GeoipEnricher

This presentations goal is to introduce the configuration of the output subfields of the GeoipEnricher.

Prerequisites: a local geo ip database is available

The challenge

The given document

[19]:
document = {"client": {"ip": "8.8.8.8"}}

will result in the default output

[20]:
default_output = {
    "client": {"ip: 8.8.8.8"},
    "geoip": {
        "geometry": {"coordinates": [-97.822, 37.751], "type": "Point"},
        "properties": {
            "accuracy_radius": 1000,
            "continent": "North America",
            "continent_code": "NA",
            "country": "United States",
            "country_iso_code": "US",
            "time_zone": "America/Chicago",
        },
        "type": "Feature",
    },
}

which instead should be configured to look like

[21]:
expected_output = {
    "client": {
        "geo": {
            "accuracy": 1000,
            "continent_code": "NA",
            "continent_name": "North America",
            "country_iso_code": "US",
            "country_name": "United States",
            "geometry_type": "Point",
            "location": [-97.822, 37.751],
            "timezone": "America/Chicago",
            "type": "Feature",
        },
        "ip": "8.8.8.8",
    }
}

Create rule and processor

create the rule:

[22]:
import sys
sys.path.append("../../../../../")
from pathlib import Path
import tempfile


rule_yaml = """---
filter: "client.ip"
geoip_enricher:
  source_fields: ["client.ip"]
  customize_target_subfields:
    type: client.geo.type
    geometry.type: client.geo.geometry_type
    geometry.coordinates: client.geo.location
    properties.accuracy_radius: client.geo.accuracy
    properties.continent: client.geo.continent_name
    properties.continent_code: client.geo.continent_code
    properties.country: client.geo.country_name
    properties.city: client.geo.city_name
    properties.postal_code: client.geo.postal_code
    properties.subdivision: client.geo.subdivision
    properties.time_zone: client.geo.timezone
    properties.country_iso_code: client.geo.country_iso_code
"""

rule_path = Path(tempfile.gettempdir()) / "geoip"
rule_path.mkdir(exist_ok=True)
rule_file = rule_path / "data-stream.yml"
rule_file.write_text(rule_yaml)

[22]:
678

create the processor config and replace the db_path with your local geo ip database:

[23]:
processor_config = {
    "geoip_enricher": {
        "type": "geoip_enricher",
        "rules": [str(rule_path), "/dev"],
        "db_path": "<INSERT_PATH_TO_GEOIP_DATABASE>"
    }
}

create the processor with the factory:

[24]:
from unittest import mock
from logprep.factory import Factory

mock_logger = mock.MagicMock()
geoip_enricher = Factory.create(processor_config)
geoip_enricher

---------------------------------------------------------------------------
InvalidConfigurationError                 Traceback (most recent call last)
Cell In[24], line 5
      2 from logprep.factory import Factory
      4 mock_logger = mock.MagicMock()
----> 5 geoip_enricher = Factory.create(processor_config, mock_logger)
      6 geoip_enricher

File ~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/factory.py:36, in Factory.create(cls, configuration, logger)
     34     metric_labels = configuration[connector_name].pop("metric_labels")
     35 connector = Configuration.get_class(connector_name, connector_configuration_dict)
---> 36 connector_configuration = Configuration.create(
     37     connector_name, connector_configuration_dict
     38 )
     39 connector_configuration.metric_labels = copy.deepcopy(metric_labels)
     40 return connector(connector_name, connector_configuration, logger)

File ~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/configuration.py:34, in Configuration.create(cls, name, config_)
     19 """factory method to create component configuration
     20
     21 Parameters
   (...)
     31     the pipeline component configuration
     32 """
     33 class_ = cls.get_class(name, config_)
---> 34 return class_.Config(**config_)

File <attrs generated init logprep.processor.geoip_enricher.processor.GeoipEnricher.Config>:13, in __init__(self, type, rules, tree_config, db_path)
     11 __attr_validator_generic_rules(self, __attr_generic_rules, self.generic_rules)
     12 __attr_validator_tree_config(self, __attr_tree_config, self.tree_config)
---> 13 __attr_validator_db_path(self, __attr_db_path, self.db_path)

File ~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:53, in url_validator(_, attribute, value)
     51     raise InvalidConfigurationError(f"{attribute.name} has no schema, net location and path")
     52 if not parsed_url.scheme and not parsed_url.netloc and parsed_url.path:
---> 53     file_validator(_, attribute, value)
     54 if parsed_url.scheme == "file":
     55     if parsed_url.params or parsed_url.query or parsed_url.fragment:

File ~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:23, in file_validator(_, attribute, value)
     21     raise InvalidConfigurationError(f"{attribute.name} is not a str")
     22 if not os.path.exists(value):
---> 23     raise InvalidConfigurationError(f"{attribute.name} file '{value}' does not exist")
     24 if not os.path.isfile(value):
     25     raise InvalidConfigurationError(f"{attribute.name} '{value}' is not a file")

InvalidConfigurationError: db_path file 'tests/testdata/mock_external/MockGeoLite2-City.mmdb' does not exist

Process event

[ ]:
from copy import deepcopy

mydocument = deepcopy(document)
geoip_enricher.process(mydocument)
assert mydocument == expected_output
f"The output has the expected form: {mydocument == expected_output}"
'The output has the expected form: True'