web全局速率限制,实现url白名单,url黑名单,host白名单,rules等过滤规则,仅供参考。
使用 cache
记录请求数据,进行判断。
<?php class ratelimit { /** * @var static */ private static $instance; /** * 通过懒加载获得实例(在第一次使用的时候创建) * @retrun static */ public static function getInstance() { if (null === static::$instance) { static::$instance = new static(); } return static::$instance; } /** * 不允许从外部调用以防止创建多个实例 * 要使用单例,必须通过 static::getInstance() 方法获取实例 */ private function __construct() { } public function __destruct() { } /** * 防止实例被克隆(这会创建实例的副本) */ private function __clone() { } /** * 防止反序列化(这将创建它的副本) */ private function __wakeup() { } CONST CACHE_RATELIMIT_PREFIX = 'c_rl_'; //cache_ratelimit CONST CACHE_RATELIMIT_LOCK_PREFIX = 'c_rl_l_'; //cache_ratelimit_lock_ private function example_cache_data(){ $cache_data = [ 'cache_ratelimit_127.0.0.1' => [ // 缓存周期为1天内:1-6小时?待定 'bypass' => false, // 是否绕过 'host' => '', // 用于排除google,baidu爬虫 'pageview'=>[ // key 为 rules 中的 name 'timestamp' => time(), // 记录开始统计的时间戳,用于对比更新 'count' => 123, // 周期内的次数,用于对比更新 'lock_count' => 0, // 周期内的锁定次数,锁定时间根据这个参数累加 ], 'grna search' => [ 'timestamp' => time(), // 记录开始统计的时间戳,用于对比更新 'count' => 123, // 周期内的次数,用于对比更新 'lock_count' => 0, // 周期内的锁定次数,锁定时间根据这个参数累加 ] ] ]; } // url路径黑名单,不可以绕过白名单,一般以具体url为主 private static $black_ops = array( array("string", '/customer/ajax/login'), array("string", '/customer/signup/sent_code_email'), array("string", '/customer/ajax/signup'), array("string", '/customer/ajax/sendSms'), array("string", '/product/documents/down'), array("string", '/gRNA-detail/search'), array("string", '/location.php'), ); // url路径白名单,绕过统计 private static $white_ops = array( array("regx", '/^\/api\/.*/'), array("regx", '/^\/quick_order\/.*/'), array("regx", '/^\/customer\/.*/'), array("string", '/ajax/formhash'), array("string", '/customer/cart/count'), array("string", '/customer/ajax/get_state_by_country'), ); // host 白名单 爬虫白名单 private static $white_hosts = array( '.googlebot.com.', '.google.com.', '.googleusercontent.com.', //google快照 '.crawl.baidu.com.', '.applebot.apple.com.', '.search.msn.com.', ); // 速率限制规则,规则不要太多,cloudflare也禁提供15个规则 private static $rules = array( array( 'name' => 'pageview', //页面浏览 'url_path' => '/.*/', 'methods' => array('GET'), 'isajax' => false, 'second_limit' => 180, //猜测:3分钟内100次比1分钟内30次更能排除采集或攻击 'count_limit' => 100, 'second_wait' => 300, // 注意 生产环境php5.5版本 不支持 5 * 60的写法 ), array( 'name' => 'postform', //非ajax,POST表单提交 'url_path' => '/.*/', 'methods' => array('POST'), 'isajax' => false, 'second_limit' => 60, 'count_limit' => 25, 'second_wait' => 300, ), array( 'name' => 'ajax', //ajax提交 'url_path' => '/.*/', 'methods' => array('GET','POST'), 'isajax' => true, 'second_limit' => 180, 'count_limit' => 100, 'second_wait' => 300, ), array( 'name' => 'sendSms', //中文站短信发送 'url_path' => '/customer/ajax/sendSms', 'methods' => array('GET','POST'), 'isajax' => null, 'second_limit' => 180, 'count_limit' => 10, 'second_wait' => 300, ), array( 'name' => 'grna-search', 'url_path' => '/gRNA-detail/search', 'methods' => array('POST'), 'isajax' => null, 'second_limit' => 120, 'count_limit' => 60, 'second_wait' => 300, ), array( 'name' => 'product-documents', 'url_path' => '/product/documents', 'methods' => array('GET'), 'isajax' => null, 'second_limit' => 120, 'count_limit' => 50, 'second_wait' => 300, ), array( 'name' => 'product-documents-down', 'url_path' => '/product/documents/down', 'methods' => array('GET', 'POST'), 'isajax' => null, 'second_limit' => 120, 'count_limit' => 50, 'second_wait' => 300, ), array( 'name' => 'location-php', 'url_path' => '/location.php', 'methods' => array('GET', 'POST'), 'isajax' => null, 'second_limit' => 120, 'count_limit' => 40, //倒计时3秒,120/3=40 'second_wait' => 300, ) ); private static function is_in_whitelist($url_path){ foreach (self::$black_ops as $black_op){ if ('regx' == $black_op[0]){ if (preg_match($black_op[1], $url_path)){ return false; } }else{ if ($url_path == $black_op[1]){ return false; } } } foreach (self::$white_ops as $white_op){ if ('regx' == $white_op[0]){ if (preg_match($white_op[1], $url_path)){ return true; } }else{ if ($url_path == $white_op[1]){ return true; } } } return false; } private static function get_cache_key($ip){ return self::CACHE_RATELIMIT_PREFIX . $ip; } private static function get_cache_lock_key($ip, $rule_name = ''){ return self::CACHE_RATELIMIT_LOCK_PREFIX . $ip. '_' . $rule_name; } /* QA环境有两个ip,Request::$client_ip 更具体化 var_dump($_SERVER['REMOTE_ADDR']); var_dump(Request::$client_ip); */ private static function get_client_ip(){ return Request::$client_ip; } private static function check_rule_match($request, $rule){ //Request::detect_uri() 与 $request->uri() 不一样!!! $url_path = Request::detect_uri(); //析构函数中,$request可能为null if ($request){ $method = $request->method(); $isajax = $request->is_ajax(); }else{ $method = 'GET'; $isajax = false; } if (!in_array($method, $rule['methods'])){ return false; } if (!is_null($rule['isajax'])){ if ($isajax !== $rule['isajax']){ return false; } } $len = strlen($rule['url_path']); if ($len>1 && $rule['url_path'][0]=='/' && $rule['url_path'][$len-1]=='/'){ if (!preg_match($rule['url_path'], $url_path)){ return false; } }else if (!($url_path == $rule['url_path'])){ return false; } return true; } //对请求进行统计 public static function addup(Request $request){ if(customer::check_inside_ip(Request::$client_ip)){ return; } try { $cache = Cache::instance(); }catch (Exception $e){ return; } try { $url_path = Request::detect_uri(); //1.白名单检测与跳过 if (self::is_in_whitelist($url_path)){ return; } //2.记录 $ip = self::get_client_ip(); $cache_key = self::get_cache_key($ip); $cache_data = $cache->get($cache_key); if (!$cache_data){ $cache_data = array('bypass' => false, 'host'=>''); } if ($cache_data['bypass']){ return; } if (!isset($cache_data['host']) || !$cache_data['host']){ $fp = popen('host ' . $ip, 'r'); if ($fp){ $line = trim(fgets($fp)); if ($line){ $cache_data['host'] = $line; } pclose($fp); } } // host 判断,跳过爬虫 if ($cache_data['host']){ foreach (self::$white_hosts as $white_host){ if (strpos($cache_data['host'], $white_host) > 0){ //host 白名单,绕过,保持时间长一点 $cache_data['bypass'] = true; $cache->set($cache_key, $cache_data, 24*60*60); return; } } } //规则计数 $update_flag = false; foreach (self::$rules as $rule){ if (!self::check_rule_match($request, $rule)){ continue; } try { $cache_lock_key = self::get_cache_lock_key($ip, $rule['name']); $cache_lock_data = $cache->get($cache_lock_key); }catch (Exception $e){ log::start('ratelimit_error', $cache_lock_key . " - " . $e->getMessage(), __FILE__, __LINE__); } if ($cache_lock_data){ //锁定中,不统计 continue; } // 满足条件,进行统计 $update_flag = true; if (!isset($cache_data[$rule['name']])){ $cache_data[$rule['name']] = array( 'timestamp' => time(), 'count' => 1, 'lock' => 0 ); }else{ if (!isset($cache_data[$rule['name']]['lock'])){ $cache_data[$rule['name']]['lock'] = 0; } // second_limit 判断 if (time() - $cache_data[$rule['name']]['timestamp'] > $rule['second_limit']){ $cache_data[$rule['name']]['timestamp'] = time(); $cache_data[$rule['name']]['count'] = 1; }else{ $cache_data[$rule['name']]['count']++; //超过限制时 if ($cache_data[$rule['name']]['count'] >= $rule['count_limit']){ $cache_data[$rule['name']]['lock']++; //设置频繁访问标记 $lifetime = $rule['second_wait'] * $cache_data[$rule['name']]['lock']; try { $cache->set($cache_lock_key, time()+$lifetime, $lifetime); }catch (Exception $e){ log::start('ratelimit_error', $cache_lock_key . " - " . $e->getMessage(), __FILE__, __LINE__); } //记录日志 log::start('ratelimit', $ip . ' - ' . $rule['name'] . ' - ' . $url_path . ' - ' . $cache_data['host'] .' - ' . json_encode($cache_data[$rule['name']]), __FILE__, __LINE__); } } } } if ($update_flag){ // $lifetime 大于 second_wait比较好,才能叠加 try { $cache->set($cache_key, $cache_data); }catch (Exception $e){ log::start('ratelimit_error', $cache_key . " - " . $e->getMessage(), __FILE__, __LINE__); } $env = Kohana::$config->load('site.environment'); if ('WWW' != $env){ log::start('ratelimit_debug', $cache_data, __FILE__, __LINE__); } } }catch (Exception $e){ log::start('ratelimit_error', $ip . ' - ' . $e->getMessage(), __FILE__, __LINE__); return; } } //检测 默认只锁定改规则下的url访问 public static function check(Request $request, $is_lockall = false){ if(customer::check_inside_ip(Request::$client_ip)){ return; } try { $cache = Cache::instance(); }catch (Exception $e){ return null; } try { $ip = self::get_client_ip(); foreach (self::$rules as $rule){ if (!$is_lockall){ if (!self::check_rule_match($request, $rule)){ continue; } } $cache_lock_key = self::get_cache_lock_key($ip, $rule['name']); $cache_lock_data = $cache->get($cache_lock_key); if ($cache_lock_data){ //锁定中 return $cache_lock_data; } } }catch (Exception $e){ log::start('ratelimit_error', $e->getMessage(), __FILE__, __LINE__); return; } } }