Nathanwoodburn.github.io/libraries/plugins/IDS/Converter.php

751 lines
23 KiB
PHP
Raw Normal View History

2021-08-30 15:06:36 +10:00
<?php
/**
* PHPIDS
*
* Requirements: PHP5, SimpleXML
*
* Copyright (c) 2008 PHPIDS group (https://phpids.org)
*
* PHPIDS is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License, or
* (at your option) any later version.
*
* PHPIDS is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with PHPIDS. If not, see <http://www.gnu.org/licenses/>.
*
* PHP version 5.1.6+
*
* @category Security
* @package PHPIDS
* @author Mario Heiderich <mario.heiderich@gmail.com>
* @author Christian Matthies <ch0012@gmail.com>
* @author Lars Strojny <lars@strojny.net>
* @license http://www.gnu.org/licenses/lgpl.html LGPL
* @link http://php-ids.org/
*/
/**
* PHPIDS specific utility class to convert charsets manually
*
* Note that if you make use of IDS_Converter::runAll(), existing class
* methods will be executed in the same order as they are implemented in the
* class tree!
*
* @category Security
* @package PHPIDS
* @author Christian Matthies <ch0012@gmail.com>
* @author Mario Heiderich <mario.heiderich@gmail.com>
* @author Lars Strojny <lars@strojny.net>
* @copyright 2007-2009 The PHPIDS Group
* @license http://www.gnu.org/licenses/lgpl.html LGPL
* @version Release: $Id:Converter.php 517 2007-09-15 15:04:13Z mario $
* @link http://php-ids.org/
*/
class IDS_Converter
{
/**
* Runs all converter functions
*
* Note that if you make use of IDS_Converter::runAll(), existing class
* methods will be executed in the same order as they are implemented in the
* class tree!
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function runAll($value)
{
foreach (get_class_methods(__CLASS__) as $method) {
if (strpos($method, 'run') === 0) {
continue;
}
$value = self::$method($value);
}
return $value;
}
/**
* Check for comments and erases them if available
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromCommented($value)
{
// check for existing comments
if (preg_match('/(?:\<!-|-->|\/\*|\*\/|\/\/\W*\w+\s*$)|' .
'(?:--[^-]*-)/ms', $value)) {
$pattern = array(
'/(?:(?:<!)(?:(?:--(?:[^-]*(?:-[^-]+)*)--\s*)*)(?:>))/ms',
'/(?:(?:\/\*\/*[^\/\*]*)+\*\/)/ms',
'/(?:--[^-]*-)/ms'
);
$converted = preg_replace($pattern, ';', $value);
$value .= "\n" . $converted;
}
//make sure inline comments are detected and converted correctly
$value = preg_replace('/(<\w+)\/+(\w+=?)/m', '$1/$2', $value);
$value = preg_replace('/[^\\\:]\/\/(.*)$/m', '/**/$1', $value);
$value = preg_replace('/([^\-&])#.*[\r\n\v\f]/m', '$1', $value);
$value = preg_replace('/([^&\-])#.*\n/m', '$1 ', $value);
$value = preg_replace('/^#.*\n/m', ' ', $value);
return $value;
}
/**
* Strip newlines
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromWhiteSpace($value)
{
//check for inline linebreaks
$search = array('\r', '\n', '\f', '\t', '\v');
$value = str_replace($search, ';', $value);
// replace replacement characters regular spaces
$value = str_replace('<27>', ' ', $value);
//convert real linebreaks
return preg_replace('/(?:\n|\r|\v)/m', ' ', $value);
}
/**
* Checks for common charcode pattern and decodes them
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromJSCharcode($value)
{
$matches = array();
// check if value matches typical charCode pattern
if (preg_match_all('/(?:[\d+-=\/\* ]+(?:\s?,\s?[\d+-=\/\* ]+)){4,}/ms',
$value, $matches)) {
$converted = '';
$string = implode(',', $matches[0]);
$string = preg_replace('/\s/', '', $string);
$string = preg_replace('/\w+=/', '', $string);
$charcode = explode(',', $string);
foreach ($charcode as $char) {
$char = preg_replace('/\W0/s', '', $char);
if (preg_match_all('/\d*[+-\/\* ]\d+/', $char, $matches)) {
$match = preg_split('/(\W?\d+)/',
(implode('', $matches[0])),
null,
PREG_SPLIT_DELIM_CAPTURE);
if (array_sum($match) >= 20 && array_sum($match) <= 127) {
$converted .= chr(array_sum($match));
}
} elseif (!empty($char) && $char >= 20 && $char <= 127) {
$converted .= chr($char);
}
}
$value .= "\n" . $converted;
}
// check for octal charcode pattern
if (preg_match_all('/(?:(?:[\\\]+\d+[ \t]*){8,})/ims', $value, $matches)) {
$converted = '';
$charcode = explode('\\', preg_replace('/\s/', '', implode(',',
$matches[0])));
foreach ($charcode as $char) {
if (!empty($char)) {
if (octdec($char) >= 20 && octdec($char) <= 127) {
$converted .= chr(octdec($char));
}
}
}
$value .= "\n" . $converted;
}
// check for hexadecimal charcode pattern
if (preg_match_all('/(?:(?:[\\\]+\w+\s*){8,})/ims', $value, $matches)) {
$converted = '';
$charcode = explode('\\', preg_replace('/[ux]/', '', implode(',',
$matches[0])));
foreach ($charcode as $char) {
if (!empty($char)) {
if (hexdec($char) >= 20 && hexdec($char) <= 127) {
$converted .= chr(hexdec($char));
}
}
}
$value .= "\n" . $converted;
}
return $value;
}
/**
* Eliminate JS regex modifiers
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertJSRegexModifiers($value)
{
$value = preg_replace('/\/[gim]+/', '/', $value);
return $value;
}
/**
* Converts from hex/dec entities
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertEntities($value)
{
$converted = null;
//deal with double encoded payload
$value = preg_replace('/&amp;/', '&', $value);
if (preg_match('/&#x?[\w]+/ms', $value)) {
$converted = preg_replace('/(&#x?[\w]{2}\d?);?/ms', '$1;', $value);
$converted = html_entity_decode($converted, ENT_QUOTES, 'UTF-8');
$value .= "\n" . str_replace(';;', ';', $converted);
}
// normalize obfuscated protocol handlers
$value = preg_replace(
'/(?:j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:)|(d\s*a\s*t\s*a\s*:)/ms',
'javascript:', $value
);
return $value;
}
/**
* Normalize quotes
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertQuotes($value)
{
// normalize different quotes to "
$pattern = array('\'', '`', '´', '', '');
$value = str_replace($pattern, '"', $value);
//make sure harmless quoted strings don't generate false alerts
$value = preg_replace('/^"([^"=\\!><~]+)"$/', '$1', $value);
return $value;
}
/**
* Converts SQLHEX to plain text
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromSQLHex($value)
{
$matches = array();
if(preg_match_all('/(?:(?:\A|[^\d])0x[a-f\d]{3,}[a-f\d]*)+/im', $value, $matches)) {
foreach($matches[0] as $match) {
$converted = '';
foreach(str_split($match, 2) as $hex_index) {
if(preg_match('/[a-f\d]{2,3}/i', $hex_index)) {
$converted .= chr(hexdec($hex_index));
}
}
$value = str_replace($match, $converted, $value);
}
}
// take care of hex encoded ctrl chars
$value = preg_replace('/0x\d+/m', ' 1 ', $value);
return $value;
}
/**
* Converts basic SQL keywords and obfuscations
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromSQLKeywords($value)
{
$pattern = array('/(?:is\s+null)|(like\s+null)|' .
'(?:(?:^|\W)in[+\s]*\([\s\d"]+[^()]*\))/ims');
$value = preg_replace($pattern, '"=0', $value);
$value = preg_replace('/[^\w\)]+\s*like\s*[^\w\s]+/ims', '1" OR "1"', $value);
$value = preg_replace('/null([,"\s])/ims', '0$1', $value);
$value = preg_replace('/\d+\./ims', ' 1', $value);
$value = preg_replace('/,null/ims', ',0', $value);
$value = preg_replace('/(?:between)/ims', 'or', $value);
$value = preg_replace('/(?:and\s+\d+\.?\d*)/ims', '', $value);
$value = preg_replace('/(?:\s+and\s+)/ims', ' or ', $value);
$pattern = array('/(?:not\s+between)|(?:is\s+not)|(?:not\s+in)|' .
'(?:xor|<>|rlike(?:\s+binary)?)|' .
'(?:regexp\s+binary)|' .
'(?:sounds\s+like)/ims');
$value = preg_replace($pattern, '!', $value);
$value = preg_replace('/"\s+\d/', '"', $value);
$value = preg_replace('/(\W)div(\W)/ims', '$1 OR $2', $value);
$value = preg_replace('/\/(?:\d+|null)/', null, $value);
return $value;
}
/**
* Detects nullbytes and controls chars via ord()
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromControlChars($value)
{
// critical ctrl values
$search = array(
chr(0), chr(1), chr(2), chr(3), chr(4), chr(5),
chr(6), chr(7), chr(8), chr(11), chr(12), chr(14),
chr(15), chr(16), chr(17), chr(18), chr(19), chr(24),
chr(25), chr(192), chr(193), chr(238), chr(255), '\\0'
);
$value = str_replace($search, '%00', $value);
//take care for malicious unicode characters
$value = urldecode(preg_replace('/(?:%E(?:2|3)%8(?:0|1)%(?:A|8|9)' .
'\w|%EF%BB%BF|%EF%BF%BD)|(?:&#(?:65|8)\d{3};?)/i', null,
urlencode($value)));
$value = urldecode(
preg_replace('/(?:%F0%80%BE)/i', '>', urlencode($value)));
$value = urldecode(
preg_replace('/(?:%F0%80%BC)/i', '<', urlencode($value)));
$value = urldecode(
preg_replace('/(?:%F0%80%A2)/i', '"', urlencode($value)));
$value = urldecode(
preg_replace('/(?:%F0%80%A7)/i', '\'', urlencode($value)));
$value = preg_replace('/(?:%ff1c)/', '<', $value);
$value = preg_replace(
'/(?:&[#x]*(200|820|200|820|zwn?j|lrm|rlm)\w?;?)/i', null,$value
);
$value = preg_replace('/(?:&#(?:65|8)\d{3};?)|' .
'(?:&#(?:56|7)3\d{2};?)|' .
'(?:&#x(?:fe|20)\w{2};?)|' .
'(?:&#x(?:d[c-f])\w{2};?)/i', null,
$value);
$value = str_replace(
array('«', '〈', '', '', '〈', '⟨'), '<', $value
);
$value = str_replace(
array('»', '〉', '', '', '〉', '⟩'), '>', $value
);
return $value;
}
/**
* This method matches and translates base64 strings and fragments
* used in data URIs
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromNestedBase64($value)
{
$matches = array();
preg_match_all('/(?:^|[,&?])\s*([a-z0-9]{30,}=*)(?:\W|$)/im',
$value,
$matches);
foreach ($matches[1] as $item) {
if (isset($item) && !preg_match('/[a-f0-9]{32}/i', $item)) {
$base64_item = base64_decode($item);
$value = str_replace($item, $base64_item, $value);
}
}
return $value;
}
/**
* Detects nullbytes and controls chars via ord()
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromOutOfRangeChars($value)
{
$values = str_split($value);
foreach ($values as $item) {
if (ord($item) >= 127) {
$value = str_replace($item, ' ', $value);
}
}
return $value;
}
/**
* Strip XML patterns
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromXML($value)
{
$converted = strip_tags($value);
if ($converted && ($converted != $value)) {
return $value . "\n" . $converted;
}
return $value;
}
/**
* This method converts JS unicode code points to
* regular characters
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromJSUnicode($value)
{
$matches = array();
preg_match_all('/\\\u[0-9a-f]{4}/ims', $value, $matches);
if (!empty($matches[0])) {
foreach ($matches[0] as $match) {
$chr = chr(hexdec(substr($match, 2, 4)));
$value = str_replace($match, $chr, $value);
}
$value .= "\n\u0001";
}
return $value;
}
/**
* Converts relevant UTF-7 tags to UTF-8
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromUTF7($value)
{
if(preg_match('/\+A\w+-?/m', $value)) {
if (function_exists('mb_convert_encoding')) {
if(version_compare(PHP_VERSION, '5.2.8', '<')) {
$tmp_chars = str_split($value);
$value = '';
foreach($tmp_chars as $char) {
if(ord($char) <= 127) {
$value .= $char;
}
}
}
$value .= "\n" . mb_convert_encoding($value, 'UTF-8', 'UTF-7');
} else {
//list of all critical UTF7 codepoints
$schemes = array(
'+ACI-' => '"',
'+ADw-' => '<',
'+AD4-' => '>',
'+AFs-' => '[',
'+AF0-' => ']',
'+AHs-' => '{',
'+AH0-' => '}',
'+AFw-' => '\\',
'+ADs-' => ';',
'+ACM-' => '#',
'+ACY-' => '&',
'+ACU-' => '%',
'+ACQ-' => '$',
'+AD0-' => '=',
'+AGA-' => '`',
'+ALQ-' => '"',
'+IBg-' => '"',
'+IBk-' => '"',
'+AHw-' => '|',
'+ACo-' => '*',
'+AF4-' => '^',
'+ACIAPg-' => '">',
'+ACIAPgA8-' => '">'
);
$value = str_ireplace(array_keys($schemes),
array_values($schemes), $value);
}
}
return $value;
}
/**
* Converts basic concatenations
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromConcatenated($value)
{
//normalize remaining backslashes
if ($value != preg_replace('/(\w)\\\/', "$1", $value)) {
$value .= preg_replace('/(\w)\\\/', "$1", $value);
}
$compare = stripslashes($value);
$pattern = array('/(?:<\/\w+>\+<\w+>)/s',
'/(?:":\d+[^"[]+")/s',
'/(?:"?"\+\w+\+")/s',
'/(?:"\s*;[^"]+")|(?:";[^"]+:\s*")/s',
'/(?:"\s*(?:;|\+).{8,18}:\s*")/s',
'/(?:";\w+=)|(?:!""&&")|(?:~)/s',
'/(?:"?"\+""?\+?"?)|(?:;\w+=")|(?:"[|&]{2,})/s',
'/(?:"\s*\W+")/s',
'/(?:";\w\s*\+=\s*\w?\s*")/s',
'/(?:"[|&;]+\s*[^|&\n]*[|&]+\s*"?)/s',
'/(?:";\s*\w+\W+\w*\s*[|&]*")/s',
'/(?:"\s*"\s*\.)/s',
'/(?:\s*new\s+\w+\s*[+",])/',
'/(?:(?:^|\s+)(?:do|else)\s+)/',
'/(?:[{(]\s*new\s+\w+\s*[)}])/',
'/(?:(this|self)\.)/',
'/(?:undefined)/',
'/(?:in\s+)/');
// strip out concatenations
$converted = preg_replace($pattern, null, $compare);
//strip object traversal
$converted = preg_replace('/\w(\.\w\()/', "$1", $converted);
// normalize obfuscated method calls
$converted = preg_replace('/\)\s*\+/', ")", $converted);
//convert JS special numbers
$converted = preg_replace('/(?:\(*[.\d]e[+-]*[^a-z\W]+\)*)' .
'|(?:NaN|Infinity)\W/ims', 1, $converted);
if ($converted && ($compare != $converted)) {
$value .= "\n" . $converted;
}
return $value;
}
/**
* This method collects and decodes proprietary encoding types
*
* @param string $value the value to convert
*
* @static
* @return string
*/
public static function convertFromProprietaryEncodings($value) {
//Xajax error reportings
$value = preg_replace('/<!\[CDATA\[(\W+)\]\]>/im', '$1', $value);
//strip false alert triggering apostrophes
$value = preg_replace('/(\w)\"(s)/m', '$1$2', $value);
//strip quotes within typical search patterns
$value = preg_replace('/^"([^"=\\!><~]+)"$/', '$1', $value);
//OpenID login tokens
$value = preg_replace('/{[\w-]{8,9}\}(?:\{[\w=]{8}\}){2}/', null, $value);
//convert Content and \sdo\s to null
$value = preg_replace('/Content|\Wdo\s/', null, $value);
//strip emoticons
$value = preg_replace(
'/(?:\s[:;]-[)\/PD]+)|(?:\s;[)PD]+)|(?:\s:[)PD]+)|-\.-|\^\^/m',
null,
$value
);
//normalize separation char repetion
$value = preg_replace('/([.+~=*_\-;])\1{2,}/m', '$1', $value);
//normalize multiple single quotes
$value = preg_replace('/"{2,}/m', '"', $value);
//normalize quoted numerical values and asterisks
$value = preg_replace('/"(\d+)"/m', '$1', $value);
//normalize pipe separated request parameters
$value = preg_replace('/\|(\w+=\w+)/m', '&$1', $value);
//normalize ampersand listings
$value = preg_replace('/(\w\s)&\s(\w)/', '$1$2', $value);
//normalize escaped RegExp modifiers
$value = preg_replace('/\/\\\(\w)/', '/$1', $value);
return $value;
}
/**
* This method is the centrifuge prototype
*
* @param string $value the value to convert
* @param IDS_Monitor $monitor the monitor object
*
* @static
* @return string
*/
public static function runCentrifuge($value, IDS_Monitor $monitor = null)
{
$threshold = 3.49;
if (strlen($value) > 25) {
//strip padding
$tmp_value = preg_replace('/\s{4}|==$/m', null, $value);
$tmp_value = preg_replace(
'/\s{4}|[\p{L}\d\+\-=,.%()]{8,}/m',
'aaa',
$tmp_value
);
// Check for the attack char ratio
$tmp_value = preg_replace('/([*.!?+-])\1{1,}/m', '$1', $tmp_value);
$tmp_value = preg_replace('/"[\p{L}\d\s]+"/m', null, $tmp_value);
$stripped_length = strlen(preg_replace('/[\d\s\p{L}\.:,%&\/><\-)!|]+/m',
null, $tmp_value));
$overall_length = strlen(
preg_replace('/([\d\s\p{L}:,\.]{3,})+/m', 'aaa',
preg_replace('/\s{2,}/m', null, $tmp_value))
);
if ($stripped_length != 0
&& $overall_length/$stripped_length <= $threshold) {
$monitor->centrifuge['ratio'] =
$overall_length/$stripped_length;
$monitor->centrifuge['threshold'] =
$threshold;
$value .= "\n$[!!!]";
}
}
if (strlen($value) > 40) {
// Replace all non-special chars
$converted = preg_replace('/[\w\s\p{L},.:!]/', null, $value);
// Split string into an array, unify and sort
$array = str_split($converted);
$array = array_unique($array);
asort($array);
// Normalize certain tokens
$schemes = array(
'~' => '+',
'^' => '+',
'|' => '+',
'*' => '+',
'%' => '+',
'&' => '+',
'/' => '+'
);
$converted = implode($array);
$_keys = array_keys($schemes);
$_values = array_values($schemes);
$converted = str_replace($_keys, $_values, $converted);
$converted = preg_replace('/[+-]\s*\d+/', '+', $converted);
$converted = preg_replace('/[()[\]{}]/', '(', $converted);
$converted = preg_replace('/[!?:=]/', ':', $converted);
$converted = preg_replace('/[^:(+]/', null, stripslashes($converted));
// Sort again and implode
$array = str_split($converted);
asort($array);
$converted = implode($array);
if (preg_match('/(?:\({2,}\+{2,}:{2,})|(?:\({2,}\+{2,}:+)|' .
'(?:\({3,}\++:{2,})/', $converted)) {
$monitor->centrifuge['converted'] = $converted;
return $value . "\n" . $converted;
}
}
return $value;
}
}
/**
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: sw=4 ts=4 expandtab
*/