全网页页面加载时间

本文介绍的是一个关于站点全网页加载时间的一个脚本,前段时间在网络上找了很久关于在线站点全页加载的时间的,一直没有找到合适的,翻越了很久的github终于找到了一个比较适合我现在需求的一个项目,于是拿下来修改了下,目前这个有个问题是不能异步加载
参考项目:[https://github.com/donjajo/loady.git]
修改代码提交地址:[https://github.com/0x5c0f/zbx_page_load.git]
脚本依赖的额外模块: bs4(Beautiful Soup 4.x)
模块安装:
pip3 install bs4 ​(或 python3 -m pip install bs4)

检测脚本page-load.py

#!/usr/bin/env python3 # #UserParameter=custom.page.load[*],/opt/sh/zbx_discover_site/page-load.py $1 # import requests from bs4 import BeautifulSoup import re import urllib.parse import sys from time import time debug=0 class Loady: files = { 'js' : {}, 'css' : {}, 'img' : {} } def __init__( self, url, headers = {} ): if not isinstance( headers, dict ): raise ValueError( 'Headers argument must be dict instance' ) self.url = url self.total_time = 0 self.js = [] self.css = [] self.img = [] self.http_headers = headers self.soup = None self.total_size = 0 def _get( self, tag ): """Gets all site additional files and prepares their URL to be loaded""" # Get current URL data domain_scheme, domain, _, _, _, _ = urllib.parse.urlparse( self.url ) urls = [] if tag is 'script': # Get all script tag with src attribute # print(self.soup.find_all( 'script', { 'src' : re.compile( r'.*' ) } )) tags = self.soup.find_all( 'script', { 'src' : re.compile( r'.*' ) } ) elif tag is 'img': # print(self.soup.find_all( 'img', { 'src' : re.compile( r'.*' ) } )) tags = self.soup.find_all( 'img', { 'src' : re.compile( r'.*' ) } ) # elif tag is 'i': # print(tags = self.soup.find_all('i', {'style': re.compile(r'.*')})) # tags = self.soup.find_all('i', {'style': re.compile(r'.*')}) else: # Get all link tag with rel=stylesheet # print(self.soup.find_all( 'link', { 'rel' : 'stylesheet' } )) tags = self.soup.find_all( 'link', { 'rel' : 'stylesheet' } ) for each_tag in tags: # Get the value of src or href val = each_tag[ 'src' ] if tag is 'script' or tag is 'img' else each_tag[ 'href' ] #val = '' #if tag is 'script' or tag is 'img': # val = each_tag['src'] #else: # val = each_tag['href'] # parse the URL of the gotten URL url = urllib.parse.urlparse( val ) if not url[ 0 ] and url[ 1 ]: # If URL has no scheme but has domain name, we assume it is a URL that supports HTTP(S). We just append the main site scheme to it if not val.startswith("//"): urls.append( '{0}://{1}'.format( domain_scheme, val ) ) else: urls.append( '{0}:{1}'.format( domain_scheme, val ) ) elif not url[ 1 ]: # URL has no domain, its a relative path. Append the domain name to it if not val.startswith("/"): urls.append( '{0}://{1}/{2}'.format( domain_scheme, domain, val ) ) else: urls.append( '{0}://{1}{2}'.format( domain_scheme, domain, val ) ) else: # Its an absolute path, no issues bro! urls.append( val ) if tag is 'script': self.js = urls elif tag is 'img': self.img = urls else: self.css = urls def _load( self, t ): """Load the gotten links, check for response time and size. Appends it to self.files object""" _link_obj = [] if t is 'script': _link_obj = self.js elif t is 'img': _link_obj = self.img else: _link_obj = self.css # for link in ( self.js if t is 'script' else self.css ): for link in (_link_obj): if debug == 1: print(link) try: start = time() r = requests.get( link ) end = time() # Calculate the total time taken to load link response_time = ( end - start ) # Page loaded successfully if r.status_code == 200: # Get the size of page content size = sys.getsizeof(r.content) if t is 'img' else sys.getsizeof(r.text) # Add results to self.files object obj = '' if t is 'style': obj = 'css' elif t is 'img': obj = 'img' else: obj = 'js' self.files[obj][link] = {'byte_size': size, 'load_time': response_time} # Sum up total time to the existing load time self.total_time += response_time self.total_size += size except Exception as e: if debug == 1: print(e,link) continue def get( self ): """Loads the main website, calculate response time, page size and get additional files in site""" start = time() r = requests.get( self.url, headers = self.http_headers ) stop = time() if r.status_code == 200: response = r.text self.total_time = self.total_time + ( stop - start ) self.total_size += sys.getsizeof( response ) self.soup = BeautifulSoup( response, 'html.parser' ) self._get( 'script' ) self._load( 'script' ) self._get( 'style' ) self._load( 'style' ) self._get( 'img' ) self._load('img') load = Loady( sys.argv[1] , headers={ 'User-Agent' : 'zabbix pageload monitor' }) #load = Loady( sys.argv[1], headers={ 'User-Agent' : 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0' }) load.get() #print("{TIME:\"",load.total_time,"\"}",sep='') print("%.3f"%load.total_time) #print( load.total_size ) # total load size #print( load.files ) #load file and load size

由于我是用来监控的, 于是在zabbix创建了一个自动发现. 自动发现脚本discover_site.py

#!/usr/bin/env python3 #discover site file = open("/opt/sh/zbx_discover_site/site.txt") print("{") print("\t\"data\":[") try: lines = file.readlines(); count = 1 for line in lines: line = line.strip("\n") print("\t\t{") print("\t\t\t\"{#SITE}\":\"",end='') print(line,end='') print("\"") print("\t\t}",end='') if count < len(lines): print(",") count = count + 1 finally: file.close() print("\n\t]") print("}")

站点配置文件site.txt

https://www.example.com http://www.example.com

zabbix 前端配置,我是直接固定一个主机来还专门作这个的.配置成模板的需求感觉不大。

创建自动发现: 配置--主机(模板)--自动发现--创建自动发现,添加键址添加: discover.site

过滤器配置:

{#SITE} 匹配 @Linux site for autodiscovery

监控项原型配置:
名称: page load on [{#SITE}]
键值: custom.page.load[{#SITE}]
信息类型:浮点型
单位:s
数据更新时间: 300 (个人建议)

图形原型配置:
名称: page load on {#SITE}

新建系统正则表达式:
名字: Linux site for autodiscovery (与上述过滤器配置一致)
结果为真:^((http|ftp|https)://) (此处应该配置标准的url匹配规则,不过我这儿就之匹配了以http/https/ftp开头的就让他通过了)

所有配置项完成后,重启zabbix agent 就可以了,如果你是配置的模板,把模板加入到对应主机就可以了.