From 5e2562f7f4f3fef1a28273577fd099f13661a9d1 Mon Sep 17 00:00:00 2001
From: Matthew Newton <mcn4@leicester.ac.uk>
Date: Mon, 18 Jan 2016 17:41:49 +0000
Subject: [PATCH] Update elasticsearch fields to use "_" rather than "."

It seems that while elasticsearch can have data in a field as well as sub-fields, logstash
can't currently do this (it stores the event in a hash, so an entry either contains data
or another hash or list).

Work around this by using underscores, which should in theory also mean these examples will
work with elasticsearch 2, though I haven't tried it.
---
 doc/schemas/logstash/kibana3-dashboard.json |  6 +--
 doc/schemas/logstash/kibana4-dashboard.json |  4 +-
 doc/schemas/logstash/logstash-radius.conf   | 42 ++++++++++-----------
 doc/schemas/logstash/radius-mapping.sh      | 21 ++++++++---
 4 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/doc/schemas/logstash/kibana3-dashboard.json b/doc/schemas/logstash/kibana3-dashboard.json
index 694105975b0..498a1c866f6 100644
--- a/doc/schemas/logstash/kibana3-dashboard.json
+++ b/doc/schemas/logstash/kibana3-dashboard.json
@@ -290,7 +290,7 @@
           },
           "tmode": "terms_stats",
           "tstat": "max",
-          "valuefield": "Acct-Output-Octets.long",
+          "valuefield": "Acct-Output-Octets_long",
           "title": "TopN data Output"
         },
         {
@@ -326,7 +326,7 @@
           },
           "tmode": "terms_stats",
           "tstat": "max",
-          "valuefield": "Acct-Input-Octets.long",
+          "valuefield": "Acct-Input-Octets_long",
           "title": "TopN Data Input"
         }
       ],
@@ -473,4 +473,4 @@
     "hide": false
   },
   "refresh": false
-}
\ No newline at end of file
+}
diff --git a/doc/schemas/logstash/kibana4-dashboard.json b/doc/schemas/logstash/kibana4-dashboard.json
index f3cb850a862..eb7930e9535 100644
--- a/doc/schemas/logstash/kibana4-dashboard.json
+++ b/doc/schemas/logstash/kibana4-dashboard.json
@@ -97,7 +97,7 @@
     "_type": "visualization",
     "_source": {
       "title": "RADIUS table topN data transferred by User-Name",
-      "visState": "{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"Acct-Output-Octets.long\"}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"User-Name\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"Acct-Input-Octets.long\"}}],\"listeners\":{}}",
+      "visState": "{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"Acct-Output-Octets_long\"}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"User-Name\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"Acct-Input-Octets_long\"}}],\"listeners\":{}}",
       "description": "",
       "savedSearchId": "RADIUS-data",
       "version": 1,
@@ -120,4 +120,4 @@
       }
     }
   }
-]
\ No newline at end of file
+]
diff --git a/doc/schemas/logstash/logstash-radius.conf b/doc/schemas/logstash/logstash-radius.conf
index a1d9f6a7233..8b277a005f3 100644
--- a/doc/schemas/logstash/logstash-radius.conf
+++ b/doc/schemas/logstash/logstash-radius.conf
@@ -104,8 +104,8 @@ filter {
 		# down into components then do that here and add
 		# the data as sub-fields. For example,
 		# Called-Station-Id might be able to be broken
-		# down to Called-Station-Id.mac and Called-Station-Id.ssid
-		# on some wireless systems, or to .ip and .port
+		# down to Called-Station-Id_mac and Called-Station-Id_ssid
+		# on some wireless systems, or to _ip and _port
 		# with a VPN.
 
 		# Multiple calls to grok otherwise it can stop
@@ -113,16 +113,16 @@ filter {
 		# e.g. you want to pull both IP and port out of
 		# the same field in two different regex's.
 
-		# Pull out some IP addresses as field.ip:
+		# Pull out some IP addresses as field_ip:
 
 		grok {
 			break_on_match => false
 			tag_on_failure => []
 			match => [
-				"Framed-IP-Address", "^(?<Framed-IP-Address.ip>\d+\.\d+\.\d+\.\d+$)",
-				"NAS-IP-Address", "^(?<NAS-IP-Address.ip>\d+\.\d+\.\d+\.\d+$)",
-				"Calling-Station-Id", "^(?<Calling-Station-Id.ip>\d+\.\d+\.\d+\.\d+)",
-				"Called-Station-Id", "^(?<Called-Station-Id.ip>\d+\.\d+\.\d+\.\d+)"
+				"Framed-IP-Address", "^(?<Framed-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)",
+				"NAS-IP-Address", "^(?<NAS-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)",
+				"Calling-Station-Id", "^(?<Calling-Station-Id_ip>\d+\.\d+\.\d+\.\d+)",
+				"Called-Station-Id", "^(?<Called-Station-Id_ip>\d+\.\d+\.\d+\.\d+)"
 			]
 		}
 
@@ -133,11 +133,11 @@ filter {
 			break_on_match => false
 			tag_on_failure => []
 			match => [
-				"User-Name", "^(?<User-Name.username>[^@]+)?(?:@(?<User-Name.realm>[^@]+))$",
-				"Operator-Name", "^(?<Operator-Name.id>.)(?<Operator-Name.value>.+)$",
+				"User-Name", "^(?<User-Name_username>[^@]+)?(?:@(?<User-Name_realm>[^@]+))$",
+				"Operator-Name", "^(?<Operator-Name_id>.)(?<Operator-Name_value>.+)$",
 
-				"Calling-Station-Id", "\[(?<Calling-Station-Id.port>\d+)\]$",
-				"Called-Station-Id", "\[(?<Called-Station-Id.port>\d+)\]$"
+				"Calling-Station-Id", "\[(?<Calling-Station-Id_port>\d+)\]$",
+				"Called-Station-Id", "\[(?<Called-Station-Id_port>\d+)\]$"
 			]
 		}
 
@@ -149,13 +149,13 @@ filter {
 			break_on_match => false
 			tag_on_failure => []
 			match => [
-				"Calling-Station-Id", "^(?<Calling-Station-Id.mac>[a-fA-F0-9:-]{17})$",
-				"Calling-Station-Id", "^(?<Calling-Station-Id.mac>[a-fA-F0-9\.]{14})$",
-				"Calling-Station-Id", "^(?<Calling-Station-Id.mac>[a-fA-F0-9]{12})$",
+				"Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9:-]{17})$",
+				"Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9\.]{14})$",
+				"Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9]{12})$",
 
-				"Called-Station-Id", "^(?<Called-Station-Id.mac>[a-fA-F0-9:-]{17})(?::(?<Called-Station-Id.ssid>.*))?$",
-				"Called-Station-Id", "^(?<Called-Station-Id.mac>[a-fA-F0-9\.]{14})(?::(?<Called-Station-Id.ssid>.*))?$",
-				"Called-Station-Id", "^(?<Called-Station-Id.mac>[a-fA-F0-9]{12})(?::(?<Called-Station-Id.ssid>.*))?$"
+				"Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9:-]{17})(?::(?<Called-Station-Id_ssid>.*))?$",
+				"Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9\.]{14})(?::(?<Called-Station-Id_ssid>.*))?$",
+				"Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9]{12})(?::(?<Called-Station-Id_ssid>.*))?$"
 			]
 		}
 
@@ -167,8 +167,8 @@ filter {
 
 		# sanitize_mac {
 		# 	match => {
-		# 		"Called-Station-Id.mac" => "Called-Station-Id.mac"
-		# 		"Calling-Station-Id.mac" => "Calling-Station-Id.mac"
+		# 		"Called-Station-Id_mac" => "Called-Station-Id_mac"
+		# 		"Calling-Station-Id_mac" => "Calling-Station-Id_mac"
 		# 		}
 		# 	separator => ":"
 		# 	fixcase => "lower"
@@ -182,14 +182,14 @@ filter {
 
 		if ([Acct-Input-Octets]) {
 			ruby {
-				code => "event['Acct-Input-Octets.long'] =
+				code => "event['Acct-Input-Octets_long'] =
 					event['Acct-Input-Octets'].to_i + ( event['Acct-Input-Gigawords'] ? (event['Acct-Input-Gigawords'].to_i * (2**32)) : 0)"
 			}
 		}
 
 		if ([Acct-Output-Octets]) {
 			ruby {
-				code => "event['Acct-Output-Octets.long'] =
+				code => "event['Acct-Output-Octets_long'] =
 					event['Acct-Output-Octets'].to_i + ( event['Acct-Output-Gigawords'] ? (event['Acct-Output-Gigawords'].to_i * (2**32)) : 0)"
 			}
 		}
diff --git a/doc/schemas/logstash/radius-mapping.sh b/doc/schemas/logstash/radius-mapping.sh
index 8fe8a480dd2..f3f47245dae 100755
--- a/doc/schemas/logstash/radius-mapping.sh
+++ b/doc/schemas/logstash/radius-mapping.sh
@@ -19,16 +19,16 @@
 
 # Additionally, the supplied logstash config will try and extract
 # MAC addresses, IP addresses and ports from the data. These are
-# stored as sub-fields under the respective attribute. For
-# example, an attribute
+# stored with suffixes on the respective attribute. For example,
+# an attribute
 #
 #   Called-Station-Id := "10.0.4.6[4500]"
 #
 # will be broken down into the following fields in elasticsearch:
 #
 #   Called-Station-Id = "10.0.4.6[4500]"
-#   Called-Station-Id.ip = "10.0.4.6"
-#   Called-Station-Id.port = "4500"
+#   Called-Station-Id_ip = "10.0.4.6"
+#   Called-Station-Id_port = "4500"
 #
 # This mapping ensures that these have an appropriate data type.
 
@@ -61,7 +61,7 @@ curl -XPUT '127.0.0.1:9200/_template/radius' -d '
         },
 
         { "ipv4_address": {
-            "path_match": "*.ip",
+            "path_match": "*_ip",
             "mapping": {
               "type": "ip",
               "doc_values": true
@@ -70,7 +70,16 @@ curl -XPUT '127.0.0.1:9200/_template/radius' -d '
         },
 
         { "network_port": {
-            "path_match": "*.port",
+            "path_match": "*_port",
+            "mapping": {
+              "type": "integer",
+              "doc_values": true
+            }
+          }
+        },
+
+        { "long_number": {
+            "path_match": "*_long",
             "mapping": {
               "type": "integer",
               "doc_values": true
-- 
2.47.3