**************************************************************************** Using Raw Data Sizing and Custom Search Base These searches use the len Splunk Search command to get the size of the raw event using a custom base search for specific type of data. **************************************************************************** NOTE: Just replace "EventCode" and "sourcetype" with values corresponding to the type of data that you are looking to measure. ===================================================== Simple Searches: ===================================================== Indexed Raw Data Size by host By Day: ------------------------------------- sourcetype=WinEventLog:* | fields _raw, _time, host | eval evt_bytes = len(_raw) | timechart span=1d sum(eval(evt_bytes/1024/1024)) AS TotalMB by host Indexed Raw Data Size by sourcetype By Day: ------------------------------------------- sourcetype=WinEventLog:* | fields _raw, _time, sourcetype | eval evt_bytes = len(_raw) | timechart span=1d sum(eval(evt_bytes/1024/1024)) AS TotalMB by sourcetype Indexed Raw Data Size by Windows EventCode By Day: -------------------------------------------------- sourcetype=WinEventLog:* | fields _raw, _time, EventCode | eval evt_bytes = len(_raw) | timechart span=1d limit=10 sum(eval(evt_bytes/1024/1024)) AS TotalMB byEventCode useother=false Avg Event count/day, Avg bytes/day and Avg event size by sourcetype: -------------------------------------------------------------------- index=_internal kb group="per_sourcetype_thruput" | eval B = round((kb*1024),2) | stats sum(ev) as count, sum(B) as B by series, date_mday | eval aes = (B/count) | stats avg(count) as AC, avg(B) as AB, avg(aes) as AES by series | eval AB = round(AB,0) | eval AC = round(AC,0) | eval AES = round(AES,2) | rename AB as "Avg bytes/day", AC as "Avg events/day", AES as "Avg event size" Avg Event count/day, Avg bytes/day and Avg event size by source: ---------------------------------------------------------------- index=_internal kb group="per_source_thruput" | eval B = round((kb*1024),2) | stats sum(ev) as count, sum(B) as B by series, date_mday | eval aes = (B/count) | stats avg(count) as AC, avg(B) as AB, avg(aes) as AES by series | eval AB = round(AB,0) | eval AC = round(AC,0) | eval AES = round(AES,2) | rename AB as "Avg bytes/day", AC as "Avg events/day", AES as "Avg event size” ===================================================== Combined Hosts and Sourcetypes: ===================================================== Top 10 hosts and Top 5 sourcetypes for each host by Day: -------------------------------------------------------- sourcetype=WinEventLog:* | fields _raw, _time, host, sourcetype | eval evt_bytes = len(_raw) | eval day_period=strftime(_time, "%m/%d/%Y") | stats sum(evt_bytes) AS TotalMB, count AS Total_Events by day_period,host,sourcetype | sort day_period | eval TotalMB=round(TotalMB/1024/1024,4) | eval Total_Events_st=tostring(Total_Events,"commas") | eval comb="| - (".round(TotalMB,2)." MB) for ".sourcetype." data" | sort -TotalMB | stats list(comb) AS subcomb, sum(TotalMB) AS TotalMB by host, day_period | eval subcomb=mvindex(subcomb,0,4) | mvcombine subcomb | sort -TotalMB | eval endcomb="|".host." (Total - ".round(TotalMB,2)."MB):".subcomb | stats sum(TotalMB) AS Daily_Size_Total, list(endcomb) AS Details by day_period | eval Daily_Size_Total=round(Daily_Size_Total,2) | eval Details=mvindex(Details,0,9) | makemv delim="|" Details | sort-day_period Top 10 Hosts and Top 5 Windows Event IDs by Day: -------------------------------------------------------- sourcetype=WinEventLog:* | fields _raw, _time, host, EventCode | eval evt_bytes = len(_raw) | eval day_period=strftime(_time, "%m/%d/%Y") | stats sum(evt_bytes) AS TotalMB, count AS Total_Events by day_period,host,EventCode | sort day_period | eval TotalMB=round(TotalMB/1024/1024,4) | eval Total_Events_st=tostring(Total_Events,"commas") | eval comb="| - (".round(TotalMB,2)." MB) for EventID- ".EventCode." data" | sort -TotalMB | stats list(comb) AS subcomb, sum(TotalMB) AS TotalMB by host, day_period | eval subcomb=mvindex(subcomb,0,4) | mvcombine subcomb | sort -TotalMB | eval endcomb="|".host." (Total - ".round(TotalMB,2)."MB):".subcomb | stats sum(TotalMB) AS Daily_Size_Total, list(endcomb) AS Details by day_period | eval Daily_Size_Total=round(Daily_Size_Total,2) | eval Details=mvindex(Details,0,9) | makemv delim="|" Details | sort-day_period ************************************************************************** Licensing/Storage Metrics Source The below searches look against the internally collected licensing/metrics logs and introspection index. These are in license_usage.log, which is indexed into the _internal index. ************************************************************************** ============================================ Splunk Index License Size Analysis ============================================ Percent used by each index: --------------------------- index=_internal source=*license_usage.log type=Usage | fields idx, b | rename idx AS index_name | stats sum(eval(b/1024/1024)) as Total_MB by index_name | eventstats sum(Total_MB) as Overall_Total_MB | sort -Total_MB | eval Percent_Of_Total=round(Total_MB/Overall_Total_MB*100,2)."%" | eval Total_MB = tostring(round(Total_MB,2),"commas") | eval Overall_Total_MB = tostring(round(Overall_Total_MB,2),"commas") | table index_name, Percent_Of_Total, Total_MB, Overall_Total_MB Total MB by index, Day – Timechart: ----------------------------------- index=_internal source=*license_usage.log type=Usage | fields idx, b | rename idx as index_name | timechart span=1d limit=20 sum(eval(round(b/1024/1024,4))) AS Total_MB by index_name ============================================ Splunk Sourcetype License Size Analysis ============================================ Percent used by each sourcetype: ------------------------------------------- index=_internal source=*license_usage.log type=Usage | fields st, b | rename st AS sourcetype_name | stats sum(eval(b/1024/1024)) as Total_MB by sourcetype_name | eventstats sum(Total_MB) as Overall_Total_MB | sort -Total_MB | eval Percent_Of_Total=round(Total_MB/Overall_Total_MB*100,2)."%" | eval Total_MB = tostring(round(Total_MB,2),"commas") | eval Overall_Total_MB = tostring(round(Overall_Total_MB,2),"commas") | table sourcetype_name, Percent_Of_Total, Total_MB, Overall_Total_MB Total MB by sourcetype, Day – Timechart: ------------------------------------------- index=_internal source=*license_usage.log type=Usage | fields st, b | rename st as sourcetype_name | timechart span=1d limit=20 sum(eval(round(b/1024/1024,4))) AS Total_MB by sourcetype_name ============================================ Splunk host License Size Analysis ============================================ Percent used by each index: ------------------------------------------- index=_internal source=*license_usage.log type=Usage | fields h, b | rename h AS host_name | stats sum(eval(b/1024/1024)) as Total_MB by host_name | eventstats sum(Total_MB) as Overall_Total_MB | sort -Total_MB | eval Percent_Of_Total=round(Total_MB/Overall_Total_MB*100,2)."%" | eval Total_MB = tostring(round(Total_MB,2),"commas") | eval Overall_Total_MB = tostring(round(Overall_Total_MB,2),"commas") | table host_name, Percent_Of_Total, Total_MB, Overall_Total_MB Total MB by host, Day – Timechart: ------------------------------------------- index=_internal source=*license_usage.log type=Usage | fields h, b | rename h as host_name | timechart span=1d limit=20 sum(eval(round(b/1024/1024,4))) AS Total_MB by host_name ============================================ Splunk Index Storage Size Analysis ============================================ Storage Size used by each non-internal index: ------------------------------------------- index=_introspection component=Indexes NOT(data.name="Value*" OR data.name="summary" OR data.name="_*") | eval data.total_size = 'data.total_size' / 1024 | timechart span=1d limit=10 max("data.total_size") by data.name Storage Size used by each internal index: ------------------------------------------- index=_introspection component=Indexes (data.name="Value*" OR data.name="summary" OR data.name="_*") | eval data.total_size = 'data.total_size' / 1024 | timechart span=1d limit=10 max("data.total_size") by data.name