web全局速率限制,实现url白名单,url黑名单,host白名单,rules等过滤规则,仅供参考。

代码实现

使用 cache记录请求数据,进行判断。

<?php
class ratelimit
{
    /**
     * @var static
     */
    private static $instance;

    /**
     * 通过懒加载获得实例(在第一次使用的时候创建)
     * @retrun static
     */
    public static function getInstance()
    {
        if (null === static::$instance) {
            static::$instance = new static();
        }

        return static::$instance;
    }

    /**
     * 不允许从外部调用以防止创建多个实例
     * 要使用单例,必须通过 static::getInstance() 方法获取实例
     */
    private function __construct()
    {

    }

    public function __destruct()
    {
    }

    /**
     * 防止实例被克隆(这会创建实例的副本)
     */
    private function __clone()
    {
    }

    /**
     * 防止反序列化(这将创建它的副本)
     */
    private function __wakeup()
    {
    }

    CONST CACHE_RATELIMIT_PREFIX = 'c_rl_';   //cache_ratelimit
    CONST CACHE_RATELIMIT_LOCK_PREFIX = 'c_rl_l_'; //cache_ratelimit_lock_

    private function example_cache_data(){
        $cache_data = [
            'cache_ratelimit_127.0.0.1' => [    // 缓存周期为1天内:1-6小时?待定
                'bypass' => false,              // 是否绕过
                'host' => '',                   // 用于排除google,baidu爬虫
                'pageview'=>[                   // key 为 rules 中的 name
                    'timestamp' => time(),      // 记录开始统计的时间戳,用于对比更新
                    'count' => 123,             // 周期内的次数,用于对比更新
                    'lock_count' => 0,          // 周期内的锁定次数,锁定时间根据这个参数累加
                ],
                'grna search' => [
                    'timestamp' => time(),      // 记录开始统计的时间戳,用于对比更新
                    'count' => 123,             // 周期内的次数,用于对比更新
                    'lock_count' => 0,          // 周期内的锁定次数,锁定时间根据这个参数累加
                ]
            ]
        ];
    }

    // url路径黑名单,不可以绕过白名单,一般以具体url为主
    private static $black_ops = array(
        array("string", '/customer/ajax/login'),
        array("string", '/customer/signup/sent_code_email'),
        array("string", '/customer/ajax/signup'),
        array("string", '/customer/ajax/sendSms'),
        array("string", '/product/documents/down'),
        array("string", '/gRNA-detail/search'),
        array("string", '/location.php'),
    );

    // url路径白名单,绕过统计
    private static $white_ops = array(
        array("regx", '/^\/api\/.*/'),
        array("regx", '/^\/quick_order\/.*/'),
        array("regx", '/^\/customer\/.*/'),
        array("string", '/ajax/formhash'),
        array("string", '/customer/cart/count'),
        array("string", '/customer/ajax/get_state_by_country'),
    );

    // host 白名单 爬虫白名单
    private static $white_hosts = array(
        '.googlebot.com.',
        '.google.com.',
        '.googleusercontent.com.',  //google快照
        '.crawl.baidu.com.',
        '.applebot.apple.com.',
        '.search.msn.com.',
    );

    // 速率限制规则,规则不要太多,cloudflare也禁提供15个规则
    private static $rules = array(
        array(
            'name' => 'pageview',   //页面浏览
            'url_path' => '/.*/',
            'methods' => array('GET'),
            'isajax' => false,
            'second_limit' => 180,   //猜测:3分钟内100次比1分钟内30次更能排除采集或攻击
            'count_limit' => 100,
            'second_wait' => 300,   // 注意 生产环境php5.5版本 不支持 5 * 60的写法
        ),
        array(
            'name' => 'postform',   //非ajax,POST表单提交
            'url_path' => '/.*/',
            'methods' => array('POST'),
            'isajax' => false,
            'second_limit' => 60,
            'count_limit' => 25,
            'second_wait' => 300,
        ),
        array(
            'name' => 'ajax',   //ajax提交
            'url_path' => '/.*/',
            'methods' => array('GET','POST'),
            'isajax' => true,
            'second_limit' => 180,
            'count_limit' => 100,
            'second_wait' => 300,
        ),
        array(
            'name' => 'sendSms',   //中文站短信发送
            'url_path' => '/customer/ajax/sendSms',
            'methods' => array('GET','POST'),
            'isajax' => null,
            'second_limit' => 180,
            'count_limit' => 10,
            'second_wait' => 300,
        ),
        array(
            'name' => 'grna-search',
            'url_path' => '/gRNA-detail/search',
            'methods' => array('POST'),
            'isajax' => null,
            'second_limit' => 120,
            'count_limit' => 60,
            'second_wait' => 300,
        ),
        array(
            'name' => 'product-documents',
            'url_path' => '/product/documents',
            'methods' => array('GET'),
            'isajax' => null,
            'second_limit' => 120,
            'count_limit' => 50,
            'second_wait' => 300,
        ),
        array(
            'name' => 'product-documents-down',
            'url_path' => '/product/documents/down',
            'methods' => array('GET', 'POST'),
            'isajax' => null,
            'second_limit' => 120,
            'count_limit' => 50,
            'second_wait' => 300,
        ),
        array(
            'name' => 'location-php',
            'url_path' => '/location.php',
            'methods' => array('GET', 'POST'),
            'isajax' => null,
            'second_limit' => 120,
            'count_limit' => 40,    //倒计时3秒,120/3=40
            'second_wait' => 300,
        )
    );

    private static function is_in_whitelist($url_path){
        foreach (self::$black_ops as $black_op){
            if ('regx' == $black_op[0]){
                if (preg_match($black_op[1], $url_path)){
                    return false;
                }
            }else{
                if ($url_path == $black_op[1]){
                    return false;
                }
            }
        }

        foreach (self::$white_ops as $white_op){
            if ('regx' == $white_op[0]){
                if (preg_match($white_op[1], $url_path)){
                    return true;
                }
            }else{
                if ($url_path == $white_op[1]){
                    return true;
                }
            }
        }
        return false;
    }

    private static function get_cache_key($ip){
        return self::CACHE_RATELIMIT_PREFIX . $ip;
    }

    private static function get_cache_lock_key($ip, $rule_name = ''){
        return self::CACHE_RATELIMIT_LOCK_PREFIX . $ip. '_' . $rule_name;
    }

    /*
        QA环境有两个ip,Request::$client_ip 更具体化
        var_dump($_SERVER['REMOTE_ADDR']);
        var_dump(Request::$client_ip);
     */
    private static function get_client_ip(){
        return Request::$client_ip;
    }

    private static function check_rule_match($request, $rule){
        //Request::detect_uri() 与 $request->uri() 不一样!!!
        $url_path = Request::detect_uri();
        //析构函数中,$request可能为null
        if ($request){
            $method = $request->method();
            $isajax = $request->is_ajax();
        }else{
            $method = 'GET';
            $isajax = false;
        }

        if (!in_array($method, $rule['methods'])){
            return false;
        }
        if (!is_null($rule['isajax'])){
            if ($isajax !== $rule['isajax']){
                return false;
            }
        }

        $len = strlen($rule['url_path']);
        if ($len>1 && $rule['url_path'][0]=='/' && $rule['url_path'][$len-1]=='/'){
            if (!preg_match($rule['url_path'], $url_path)){
                return false;
            }
        }else if (!($url_path == $rule['url_path'])){
            return false;
        }
        return true;
    }

    //对请求进行统计
    public static function addup(Request $request){
        if(customer::check_inside_ip(Request::$client_ip)){
            return;
        }

        try {
            $cache = Cache::instance();
        }catch (Exception $e){
            return;
        }

        try {
            $url_path = Request::detect_uri();
            //1.白名单检测与跳过
            if (self::is_in_whitelist($url_path)){
                return;
            }

            //2.记录
            $ip = self::get_client_ip();
            $cache_key = self::get_cache_key($ip);

            $cache_data = $cache->get($cache_key);
            if (!$cache_data){
                $cache_data = array('bypass' => false, 'host'=>'');
            }

            if ($cache_data['bypass']){
                return;
            }

            if (!isset($cache_data['host']) || !$cache_data['host']){
                $fp = popen('host ' . $ip, 'r');
                if ($fp){
                    $line = trim(fgets($fp));
                    if ($line){
                        $cache_data['host'] = $line;
                    }
                    pclose($fp);
                }
            }

            // host 判断,跳过爬虫
            if ($cache_data['host']){
                foreach (self::$white_hosts as $white_host){
                    if (strpos($cache_data['host'], $white_host) > 0){
                        //host 白名单,绕过,保持时间长一点
                        $cache_data['bypass'] = true;
                        $cache->set($cache_key, $cache_data, 24*60*60);
                        return;
                    }
                }
            }

            //规则计数
            $update_flag = false;
            foreach (self::$rules as $rule){

                if (!self::check_rule_match($request, $rule)){
                    continue;
                }

                try {
                    $cache_lock_key = self::get_cache_lock_key($ip, $rule['name']);
                    $cache_lock_data = $cache->get($cache_lock_key);
                }catch (Exception $e){
                    log::start('ratelimit_error', $cache_lock_key . " - " . $e->getMessage(), __FILE__, __LINE__);
                }

                if ($cache_lock_data){
                    //锁定中,不统计
                    continue;
                }

                // 满足条件,进行统计
                $update_flag = true;
                if (!isset($cache_data[$rule['name']])){
                    $cache_data[$rule['name']] = array(
                        'timestamp' => time(),
                        'count' => 1,
                        'lock' => 0
                    );
                }else{
                    if (!isset($cache_data[$rule['name']]['lock'])){
                        $cache_data[$rule['name']]['lock'] = 0;
                    }
                    // second_limit 判断
                    if (time() - $cache_data[$rule['name']]['timestamp'] > $rule['second_limit']){
                        $cache_data[$rule['name']]['timestamp'] = time();
                        $cache_data[$rule['name']]['count'] = 1;
                    }else{
                        $cache_data[$rule['name']]['count']++;
                        //超过限制时
                        if ($cache_data[$rule['name']]['count'] >= $rule['count_limit']){
                            $cache_data[$rule['name']]['lock']++;

                            //设置频繁访问标记
                            $lifetime = $rule['second_wait'] * $cache_data[$rule['name']]['lock'];
                            try {
                                $cache->set($cache_lock_key, time()+$lifetime, $lifetime);
                            }catch (Exception $e){
                                log::start('ratelimit_error', $cache_lock_key . " - " . $e->getMessage(), __FILE__, __LINE__);
                            }
                            //记录日志
                            log::start('ratelimit', $ip . ' - ' . $rule['name'] . ' - ' . $url_path . ' - ' . $cache_data['host'] .' - ' . json_encode($cache_data[$rule['name']]), __FILE__, __LINE__);
                        }
                    }
                }
            }
            if ($update_flag){
                // $lifetime 大于 second_wait比较好,才能叠加
                try {
                    $cache->set($cache_key, $cache_data);
                }catch (Exception $e){
                    log::start('ratelimit_error', $cache_key . " - " . $e->getMessage(), __FILE__, __LINE__);
                }
                $env = Kohana::$config->load('site.environment');
                if ('WWW' != $env){
                    log::start('ratelimit_debug', $cache_data, __FILE__, __LINE__);
                }
            }
        }catch (Exception $e){
            log::start('ratelimit_error', $ip . ' - ' . $e->getMessage(), __FILE__, __LINE__);
            return;
        }
    }

    //检测 默认只锁定改规则下的url访问
    public static function check(Request $request, $is_lockall = false){
        if(customer::check_inside_ip(Request::$client_ip)){
            return;
        }

        try {
            $cache = Cache::instance();
        }catch (Exception $e){
            return null;
        }

        try {
            $ip = self::get_client_ip();
            foreach (self::$rules as $rule){
                if (!$is_lockall){
                    if (!self::check_rule_match($request, $rule)){
                        continue;
                    }
                }

                $cache_lock_key = self::get_cache_lock_key($ip, $rule['name']);
                $cache_lock_data = $cache->get($cache_lock_key);
                if ($cache_lock_data){
                    //锁定中
                    return $cache_lock_data;
                }
            }
        }catch (Exception $e){
            log::start('ratelimit_error', $e->getMessage(), __FILE__, __LINE__);
            return;
        }
    }
}