diff -ruN awffull-3.10.2.orig/doc/awffull.conf.5 awffull-3.10.2/doc/awffull.conf.5 --- awffull-3.10.2.orig/doc/awffull.conf.5 2008-12-12 19:25:09.000000000 -0700 +++ awffull-3.10.2/doc/awffull.conf.5 2009-07-31 12:22:39.000000000 -0600 @@ -239,6 +239,18 @@ produces the greatest amount of detail. User agent names that can't be mangled will be left unmodified. .TP +MangleReferrers +The MangleReferrers keyword allows you to specify how much, if any, AWFFull +should mangle referrers. This allows several levels of detail +to be produced when reporting referrer statistics. +The default Level 0 displays the +full referrer field without modification and produces the greatest +amount of detail. +Level 1 will only show the host/domain name. +Level 2 will show the first path +Level 3 will show the first two paths +and so on. +.TP AssignToCountry AssignToCountry allows a form of override to force given domains to a specified country. Use the standard 2 letter country diff -ruN awffull-3.10.2.orig/doc/awffull.conf.5.xml awffull-3.10.2/doc/awffull.conf.5.xml --- awffull-3.10.2.orig/doc/awffull.conf.5.xml 2008-12-12 19:24:13.000000000 -0700 +++ awffull-3.10.2/doc/awffull.conf.5.xml 2009-07-31 12:24:33.000000000 -0600 @@ -369,6 +369,22 @@ + MangleReferrers + + + The MangleReferrers keyword allows you to specify how much, if + any, AWFFull should mangle referrers. This allows several levels of detail + to be produced when reporting referrer statistics. + The default Level 0 displays the full referrer field without modification + and produces the greatest amount of detail. + Level 1 will only show the host/domain name. + Level 2 will show the first path + Level 3 will show the first two paths + and so on. + + + + AssignToCountry diff -ruN awffull-3.10.2.orig/README.webalizer awffull-3.10.2/README.webalizer --- awffull-3.10.2.orig/README.webalizer 2008-12-12 18:31:17.000000000 -0700 +++ awffull-3.10.2/README.webalizer 2009-07-31 12:18:51.000000000 -0600 @@ -828,6 +828,24 @@ greatest amount of detail. Command line argument: -M +MangleReferrers The MangleReferrers keyword allows you to specify how much, if any, AWFFull + should mangle referrers. This allows several levels of detail + to be produced when reporting referrer statistics. + The default Level 0 displays the + full referrer field without modification and produces the greatest + amount of detail. + Level 1 will only show the host/domain name. + Level 2 will show the first path + Level 3 will show the first two paths + and so on. + Example: + Original Referrer: http://domain.com/path1/path2/path3/path4.html + Mangled Referrer w/Level 1: http://domain.com + Mangled Referrer w/Level 2: http://domain.com/path1 + Mangled Referrer w/Level 3: http://domain.com/path1/path2 + Mangled Referrer w/Level 4: http://domain.com/path1/path2/path3 + Mangled Referrer w/Level 5: http://domain.com/path1/path2/path3/path4.html + SearchEngine This keyword allows specification of search engines and their query strings. Search strings are obtained from the referrer field in the record, and in order to work diff -ruN awffull-3.10.2.orig/sample.conf awffull-3.10.2/sample.conf --- awffull-3.10.2.orig/sample.conf 2008-12-12 18:31:17.000000000 -0700 +++ awffull-3.10.2/sample.conf 2009-07-31 12:15:08.000000000 -0600 @@ -713,6 +713,26 @@ #MangleAgents 0 +# The MangleReferrers allows you to specify how much, if any, AWFFull +# should mangle referrers. This allows several levels of detail +# to be produced when reporting referrer statistics. +# The default Level 0 displays the +# full referrer field without modification and produces the greatest +# amount of detail. +# Level 1 will only show the host/domain name. +# Level 2 will show the first path +# Level 3 will show the first two paths +# and so on. +# Example: +# Original Referrer: http://domain.com/path1/path2/path3/path4.html +# Mangled Referrer w/Level 1: http://domain.com +# Mangled Referrer w/Level 2: http://domain.com/path1 +# Mangled Referrer w/Level 3: http://domain.com/path1/path2 +# Mangled Referrer w/Level 4: http://domain.com/path1/path2/path3 +# Mangled Referrer w/Level 5: http://domain.com/path1/path2/path3/path4.html + +#MangleReferrers 0 + # The SearchEngine keywords allow specification of search engines and # their query strings on the URL. These are used to locate and report # what search strings are used to find your site. The first word is diff -ruN awffull-3.10.2.orig/src/awffull.c awffull-3.10.2/src/awffull.c --- awffull-3.10.2.orig/src/awffull.c 2008-12-12 19:28:35.000000000 -0700 +++ awffull-3.10.2/src/awffull.c 2009-07-31 12:32:47.000000000 -0600 @@ -49,6 +49,7 @@ char *get_domain(char *); /* return domain name */ char *our_gzgets(gzFile, char *, int); /* our gzgets */ int do_agent_mangling(char *); +int do_referrer_mangling(char *); void option_checks(void); /* Various early checks */ void *process_log_line(void *); bool isaffirmitive(char *); /* Is the passed in string == to Y | y | N | n | Yes or No @@ -956,6 +957,7 @@ "segcountry", /* Segmenting by Country 136 */ "segreferer", /* Segmenting by Referer 137 */ "ignoreindexalias" /* Ignore Index Alias Settings 138 */ + "manglereferrers", /* Mangle Referrers 139 */ }; FILE *fp; @@ -1485,6 +1487,9 @@ case 138: g_settings.flags.ignore_index_alias = isaffirmitive(value); break; /* IgnoreIndexAlias */ + case 139: + g_settings.settings.mangle_referrer = atoi(value); + break; /* MangleReferrers */ } } if ((page_type != NULL) && (not_page_type != NULL)) { @@ -2111,6 +2116,46 @@ } +/****************************************************************************** + * do_referrer_mangling * + * * + * Tries to reduce a complex Referrer string down to a simpler level. * + * * + * Arguments: * + * char *referrer The Referrer to reduce. This function will "mangle" this! * + * * + * Returns: * + * int. 0 on Success. * + * * + * TODO: Return something else if fails! * + * TODO: Do this nicer in PCRE or equiv. * + ******************************************************************************/ +int +do_referrer_mangling(char *referrer) +{ + char *cp1, *cp2; /* generic char pointers */ + int count = 0; + + cp1 = strchr(referrer,'/'); + if (cp1 != NULL) { + while (*cp1) { + if (*cp1 == '/') { + count++; + cp2 = cp1; + if (count == (g_settings.settings.mangle_referrer + 2)) break; + } /* if */ + cp1++; + } /* while */ + } /* if */ + + if (count && count >= (g_settings.settings.mangle_referrer + 2)) { + referrer[strlen(referrer) - strlen(cp2)] = '\0'; + } /* if */ + + return (0); +} + + /************************************************************************ * response_code_index * * * @@ -2392,6 +2437,11 @@ /* unescape referrer */ unescape(refer); + /* Do we need to mangle? */ + if (g_settings.settings.mangle_referrer) { + do_referrer_mangling(refer); + } + /* fix referrer field */ cp1 = refer; cp2 = refer; diff -ruN awffull-3.10.2.orig/src/awffull.h awffull-3.10.2/src/awffull.h --- awffull-3.10.2.orig/src/awffull.h 2008-12-12 19:28:35.000000000 -0700 +++ awffull-3.10.2/src/awffull.h 2009-07-31 12:04:44.000000000 -0600 @@ -333,6 +333,7 @@ struct generic_settings { unsigned int index_months; /* Number of Months to display */ unsigned int mangle_agent; /* mangle user agents */ + unsigned int mangle_referrer; /* mangle referrers */ unsigned int visit_timeout; /* visit timeout - units in seconds */ unsigned int log_type; /* Log Type. See #define LOG_* above */ unsigned int group_domains; /* Group domains 0=none */ diff -ruN awffull-3.10.2.orig/src/options.c awffull-3.10.2/src/options.c --- awffull-3.10.2.orig/src/options.c 2008-12-12 19:28:35.000000000 -0700 +++ awffull-3.10.2/src/options.c 2009-07-31 12:09:53.000000000 -0600 @@ -392,6 +392,7 @@ g_settings.settings.index_months = MAXHISTLEN; g_settings.settings.mangle_agent = 0; + g_settings.settings.mangle_referrer = 0; g_settings.settings.visit_timeout = 1800; g_settings.settings.log_type = LOG_AUTO; g_settings.settings.group_domains = 0; @@ -512,6 +513,7 @@ VPRINT(0, "g_settings.dump.dump_ext: %s\n", g_settings.dump.dump_ext); VPRINT(0, "g_settings.settings.mangle_agent: %u\n", g_settings.settings.mangle_agent); + VPRINT(0, "g_settings.settings.mangle_referrer: %u\n", g_settings.settings.mangle_referrer); VPRINT(0, "g_settings.settings.visit_timeout: %u\n", g_settings.settings.visit_timeout); VPRINT(0, "g_settings.settings.log_type: %u\n", g_settings.settings.log_type); VPRINT(0, "g_settings.settings.group_domains: %u\n", g_settings.settings.group_domains);