{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Configuration of the GeoipEnricher\n", "\n", "This presentations goal is to introduce the configuration of the output subfields of the `GeoipEnricher`.\n", "\n", "Prerequisites: a local geo ip database is available" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The challenge\n", "\n", "The given document" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "document = {\"client\": {\"ip\": \"8.8.8.8\"}}\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "will result in the default output" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "default_output = {\n", " \"client\": {\"ip: 8.8.8.8\"},\n", " \"geoip\": {\n", " \"geometry\": {\"coordinates\": [-97.822, 37.751], \"type\": \"Point\"},\n", " \"properties\": {\n", " \"accuracy_radius\": 1000,\n", " \"continent\": \"North America\",\n", " \"continent_code\": \"NA\",\n", " \"country\": \"United States\",\n", " \"country_iso_code\": \"US\",\n", " \"time_zone\": \"America/Chicago\",\n", " },\n", " \"type\": \"Feature\",\n", " },\n", "}\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "which instead should be configured to look like" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "expected_output = {\n", " \"client\": {\n", " \"geo\": {\n", " \"accuracy\": 1000,\n", " \"continent_code\": \"NA\",\n", " \"continent_name\": \"North America\",\n", " \"country_iso_code\": \"US\",\n", " \"country_name\": \"United States\",\n", " \"geometry_type\": \"Point\",\n", " \"location\": [-97.822, 37.751],\n", " \"timezone\": \"America/Chicago\",\n", " \"type\": \"Feature\",\n", " },\n", " \"ip\": \"8.8.8.8\",\n", " }\n", "}\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create rule and processor\n", "create the rule:" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "678" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sys\n", "sys.path.append(\"../../../../../\")\n", "from pathlib import Path\n", "import tempfile\n", "\n", "\n", "rule_yaml = \"\"\"---\n", "filter: \"client.ip\"\n", "geoip_enricher:\n", " source_fields: [\"client.ip\"]\n", " customize_target_subfields: \n", " type: client.geo.type\n", " geometry.type: client.geo.geometry_type\n", " geometry.coordinates: client.geo.location\n", " properties.accuracy_radius: client.geo.accuracy\n", " properties.continent: client.geo.continent_name\n", " properties.continent_code: client.geo.continent_code\n", " properties.country: client.geo.country_name\n", " properties.city: client.geo.city_name\n", " properties.postal_code: client.geo.postal_code\n", " properties.subdivision: client.geo.subdivision\n", " properties.time_zone: client.geo.timezone\n", " properties.country_iso_code: client.geo.country_iso_code\n", "\"\"\"\n", "\n", "rule_path = Path(tempfile.gettempdir()) / \"geoip\"\n", "rule_path.mkdir(exist_ok=True)\n", "rule_file = rule_path / \"data-stream.yml\"\n", "rule_file.write_text(rule_yaml)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "create the processor config and replace the `db_path` with your local geo ip database:" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "processor_config = {\n", " \"geoip_enricher\": {\n", " \"type\": \"geoip_enricher\",\n", " \"rules\": [str(rule_path), \"/dev\"],\n", " \"db_path\": \"\"\n", " }\n", "}\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "create the processor with the factory:" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "ename": "InvalidConfigurationError", "evalue": "db_path file 'tests/testdata/mock_external/MockGeoLite2-City.mmdb' does not exist", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mInvalidConfigurationError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[24], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mlogprep\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mfactory\u001b[39;00m \u001b[39mimport\u001b[39;00m Factory\n\u001b[1;32m 4\u001b[0m mock_logger \u001b[39m=\u001b[39m mock\u001b[39m.\u001b[39mMagicMock()\n\u001b[0;32m----> 5\u001b[0m geoip_enricher \u001b[39m=\u001b[39m Factory\u001b[39m.\u001b[39;49mcreate(processor_config, mock_logger)\n\u001b[1;32m 6\u001b[0m geoip_enricher\n", "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/factory.py:36\u001b[0m, in \u001b[0;36mFactory.create\u001b[0;34m(cls, configuration, logger)\u001b[0m\n\u001b[1;32m 34\u001b[0m metric_labels \u001b[39m=\u001b[39m configuration[connector_name]\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mmetric_labels\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 35\u001b[0m connector \u001b[39m=\u001b[39m Configuration\u001b[39m.\u001b[39mget_class(connector_name, connector_configuration_dict)\n\u001b[0;32m---> 36\u001b[0m connector_configuration \u001b[39m=\u001b[39m Configuration\u001b[39m.\u001b[39;49mcreate(\n\u001b[1;32m 37\u001b[0m connector_name, connector_configuration_dict\n\u001b[1;32m 38\u001b[0m )\n\u001b[1;32m 39\u001b[0m connector_configuration\u001b[39m.\u001b[39mmetric_labels \u001b[39m=\u001b[39m copy\u001b[39m.\u001b[39mdeepcopy(metric_labels)\n\u001b[1;32m 40\u001b[0m \u001b[39mreturn\u001b[39;00m connector(connector_name, connector_configuration, logger)\n", "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/configuration.py:34\u001b[0m, in \u001b[0;36mConfiguration.create\u001b[0;34m(cls, name, config_)\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[39m\"\"\"factory method to create component configuration\u001b[39;00m\n\u001b[1;32m 20\u001b[0m \n\u001b[1;32m 21\u001b[0m \u001b[39mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[39m the pipeline component configuration\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 33\u001b[0m class_ \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mget_class(name, config_)\n\u001b[0;32m---> 34\u001b[0m \u001b[39mreturn\u001b[39;00m class_\u001b[39m.\u001b[39;49mConfig(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mconfig_)\n", "File \u001b[0;32m:13\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, type, rules, tree_config, db_path)\u001b[0m\n\u001b[1;32m 11\u001b[0m __attr_validator_generic_rules(\u001b[39mself\u001b[39m, __attr_generic_rules, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mgeneric_rules)\n\u001b[1;32m 12\u001b[0m __attr_validator_tree_config(\u001b[39mself\u001b[39m, __attr_tree_config, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtree_config)\n\u001b[0;32m---> 13\u001b[0m __attr_validator_db_path(\u001b[39mself\u001b[39;49m, __attr_db_path, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdb_path)\n", "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:53\u001b[0m, in \u001b[0;36murl_validator\u001b[0;34m(_, attribute, value)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m has no schema, net location and path\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 52\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m parsed_url\u001b[39m.\u001b[39mscheme \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m parsed_url\u001b[39m.\u001b[39mnetloc \u001b[39mand\u001b[39;00m parsed_url\u001b[39m.\u001b[39mpath:\n\u001b[0;32m---> 53\u001b[0m file_validator(_, attribute, value)\n\u001b[1;32m 54\u001b[0m \u001b[39mif\u001b[39;00m parsed_url\u001b[39m.\u001b[39mscheme \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mfile\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m 55\u001b[0m \u001b[39mif\u001b[39;00m parsed_url\u001b[39m.\u001b[39mparams \u001b[39mor\u001b[39;00m parsed_url\u001b[39m.\u001b[39mquery \u001b[39mor\u001b[39;00m parsed_url\u001b[39m.\u001b[39mfragment:\n", "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:23\u001b[0m, in \u001b[0;36mfile_validator\u001b[0;34m(_, attribute, value)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m is not a str\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 22\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mexists(value):\n\u001b[0;32m---> 23\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m file \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m does not exist\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 24\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39misfile(value):\n\u001b[1;32m 25\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m is not a file\u001b[39m\u001b[39m\"\u001b[39m)\n", "\u001b[0;31mInvalidConfigurationError\u001b[0m: db_path file 'tests/testdata/mock_external/MockGeoLite2-City.mmdb' does not exist" ] } ], "source": [ "from unittest import mock\n", "from logprep.factory import Factory\n", "\n", "mock_logger = mock.MagicMock()\n", "geoip_enricher = Factory.create(processor_config)\n", "geoip_enricher\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Process event" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'The output has the expected form: True'" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from copy import deepcopy\n", "\n", "mydocument = deepcopy(document)\n", "geoip_enricher.process(mydocument)\n", "assert mydocument == expected_output\n", "f\"The output has the expected form: {mydocument == expected_output}\"" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.11.0 ('.venv': venv)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" } } }, "nbformat": 4, "nbformat_minor": 2 }