nginx install & configure load balancing & anti-crawler nginx nginx Command &

Nginx installation

wget https://nginx.org/download/nginx-1.14.0.tar.gz

tar -zxvf nginx-1.14.0.tar.gz

cd nginx-1.14.0

Dependent libraries:

yum install gcc-c++

yum install -y pcre pcre-devel

yum install -y zlib zlib-devel

yum install -y openssl openssl-devel

Compile and install:

./configure

make

make install

ln -s /usr/local/nginx/sbin/nginx /usr/bin/nginx

 

nginx command:
Start nginx (directly under nginx can start Linux)
nginx -s STOP (closed)
nginx -s reload (restart)

nginx load balancing configuration:

  1 #user  nobody;        #全局块
  2 worker_processes  1;
  3 
  4 #error_log  logs/error.log;
  5 #error_log  logs/error.log  notice;
  6 #error_log  logs/error.log  info;
  7 
  8 #pid        logs/nginx.pid;
  9 
 10 
 11 events {        # events块
 12     worker_connections  1024;
 13 }
 14 
 15 
 16 http {         # http块
 17     include       mime.types;  # http全局块
 18     default_type  application/octet-stream;
 19 
 20     #log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
 21     #                  '$status $body_bytes_sent "$http_referer" '
 22     #                  '"$http_user_agent" "$http_x_forwarded_for"';
 23 
 24     #access_log  logs/access.log  main;
 25 
 26     sendfile        on;
 27     #tcp_nopush     on;
 28 
 29     #keepalive_timeout  0;
 30     keepalive_timeout  65;
 31 
 32     #ON the gzip; 
33 is      
34 is      upstream myserver.com {     # load balancing configuration
 35          Server 127.0.0.1:8080 ;   # can be added to this weight (weight), the weight of each configuration server traffic weight; for example: server 127.0.0.1:8080 weight = . 1;
 36      }
 37 [  
38 is      Server {    # Server block
 39          # anti reptile     # Server global block
 40          the include anti_spider.conf;   # loading trans configuration crawler
 41 is          
42 is          the listen 80 ;
 43 is          server_name 211.67.160.21 ;
 44 is  
45          # charset KOI8-R & lt; 
46 is  
47          #access_log  logs/host.access.log  main;
 48 
 49         location ~*^.+$ {  # location 块
 50             proxy_pass http://myserver.com;
 51             allow all;
 52         }
 53         
 54         error_page 404 https://www.baidu.com;
 55 
 56         #error_page  404              /404.html;
 57 
 58         # redirect server error pages to the static page /50x.html
 59         #
 60          error_page   500 502 503 504  /50x.html;
 61         location = /50x.html {
 62             root   html;
 63         }
 64 
 65         # proxy the PHP scripts to Apache listening on 127.0.0.1:80
 66         #
 67         #location ~ \.php$ {
 68         #    proxy_pass   http://127.0.0.1;
 69         #}
 70 
 71         # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
 72         #
 73         #location ~ \.php$ {
 74         #    root           html;
 75         #    fastcgi_pass   127.0.0.1:9000;
 76         #    fastcgi_index  index.php;
 77         #    fastcgi_param  SCRIPT_FILENAME  /scripts$fastcgi_script_name;
 78         #    include        fastcgi_params;
 79         #}
 80 
 81         # deny access to .htaccess files, if Apache's document root
 82         # concurs with nginx's one
 83         #
 84         #location ~ /\.ht {
 85         #    deny  all;
 86         #}
 87                 
 88         location ~* ^.+\.(html|jpg|jpeg|gif|png|ico|css|js)$  
 89         {  
 90             root D:/register;  
 91             expires 30d;  
 92             break;  
 93         }  
 94   
 95         location ~ ^/static/ {  
 96             root D:/register;  
 97             expires 30d;  
 98             break;  
 99         }   
100   
101         location ~ ^/ {  
102             fastcgi_pass 127.0.0.1:80;  
103             fastcgi_param PATH_INFO $fastcgi_script_name;  
104             fastcgi_param REQUEST_METHOD $request_method;  
105             fastcgi_param QUERY_STRING $query_string;  
106             fastcgi_param CONTENT_TYPE $content_type;  
107             fastcgi_param CONTENT_LENGTH $content_length;  
108             fastcgi_param SERVER_PROTOCOL $server_protocol;  
109             fastcgi_param SERVER_PORT $server_port;  
110             fastcgi_param SERVER_NAME $server_name;  
111             fastcgi_pass_header Authorization;  
112             fastcgi_intercept_errors off;  
113         }  
114     }
115 
116 
117     # another virtual host using mix of IP-, name-, and port-based configuration
118     #
119     #server {
120     #    listen       8000;
121     #    listen       somename:8080;
122     #    server_name  somename  alias  another.alias;
123 
124     #    location / {
125     #        root   html;
126     #        index  index.html index.htm;
127     #    }
128     #}
129 
130 
131     # HTTPS server
132     #
133     #server {
134     #    listen       443 ssl;
135     #    server_name  localhost;
136 
137     #    ssl_certificate      cert.pem;
138     #    ssl_certificate_key  cert.key;
139 
140     #    ssl_session_cache    shared:SSL:1m;
141     #    ssl_session_timeout  5m;
142 
143     #    ssl_ciphers  HIGH:!aNULL:!MD5;
144     #    ssl_prefer_server_ciphers  on;
145 
146     #    location / {
147     #        root   html;
148     #        index  index.html index.htm;
149     #    }
150     #}
151 
152 }
  • 1, global block : nginx affect global configuration commands. Generally run nginx server groups, nginx process pid storage path, the log storage path, the profile is introduced, the number of worker process allows the generation and the like.
  • 2, Events blocks : Effect nginx configuration server or a user's network connection. Maximum number of connections per process, select an event-driven model which handles the connection request, whether to allow multiple network connections at the same time accepting, open multiple network connections such as serialization.
  • . 3, HTTP blocks : a plurality can be nested server, the vast majority Configuring third-party modules and functional agent cache, log definitions. The document is introduced, mime-type definitions, custom logging, whether sendfile file transfer, connection time, a single number of connection requests and the like.
  • . 4, Server blocks : virtual host configuration parameters, there may be a plurality of a http server.
  • . 5, LOCATION block : routing the configuration request, and the processing of various pages. 

 

 

Anti-crawler nginx:

1  # prohibited Scrapy gripping tools such as   
2  IF (* ~ $ HTTP_USER_AGENT (Scrapy | Curl | HttpClient)) {  
 . 3       return 403 ;  
 . 4  }  
 . 5   
. 6  # Disabling the UA and UA null access   
. 7  IF (~ $ HTTP_USER_AGENT " WinHttp | WebZIP | FetchURL | node- superagent | java / | FeedDemon | Jullo | JikeSpider | Indy Library | Alexa Toolbar | AskTbFXTV | AhrefsBot | CrawlDaddy | Java | Feedly | Apache-HttpAsyncClient | UniversalFeedParser | ApacheBench | Microsoft URL Control | Swiftbot | ZmEu | Obot | jaunty | Python-urllib | lightDeckReports Bot | YYSpider | DigExt | HttpClient | MJ12bot | Heritrix | EasouSpider | Ezooms | BOT / 0.1 | YandexBot | FlightDeckReports | Linguee Bot | ^ $ " ) {  
. 8       return 403 ;               
 . 9  }  
 10   
. 11  # prohibit non-GET | HEAD | POST gripping embodiment   
12 is  IF (REQUEST_METHOD ~ $ ^ (the GET | the HEAD |! The POST) $) {  
 13 is      return 403 ;  
 14  }  
 15   
16  # shield single IP command is 
. 17  # the deny 123.45.6.7 
18 is  # seal the entire segment i.e. from 123.0.0.1 to 123.255.255.254 the command 
. 19  # the deny 123.0.0.0/8 
20 is  # IP closure segment i.e. from 123.45.0.1 to 123.45.255.254 command 
21  # deny 124.45.0.0/16 
22  #IP closure segment i.e. from 123.45.6.1 to 123.45.6.254 the command 
23 is  # the deny 123.45.6.0/24 
24   
25  # The following are all rogue IP 
26 is  # the deny 58.95.66.0/24;

 

Common garbage UA list:

> FeedDemon 内容采集
> BOT/0.1 (BOT for JCE) sql注入
> CrawlDaddy sql注入
> Java 内容采集
> Jullo 内容采集
> Feedly 内容采集
> UniversalFeedParser 内容采集
> ApacheBench cc攻击器
> Swiftbot 无用爬虫
> YandexBot 无用爬虫
> AhrefsBot 无用爬虫
> YisouSpider 无用爬虫
> jikeSpider 无用爬虫
> MJ12bot 无用爬虫
> ZmEu phpmyadmin 漏洞扫描
> WinHttp 采集cc攻击
> EasouSpider 无用爬虫
> HttpClient tcp攻击
> Microsoft URL Control 扫描
> YYSpider 无用爬虫
> jaunty wordpress爆破扫描器
> oBot 无用爬虫
> Python-urllib 内容采集
> Indy Library 扫描
> FlightDeckReports Bot 无用爬虫
> Linguee Bot 无用爬虫

Guess you like

Origin www.cnblogs.com/ice-image/p/10983953.html