当前位置：首页 > news >正文
ppt模板网站源码/微信朋友圈产品推广语

news 2025/7/12 5:06:20
ppt模板网站源码,微信朋友圈产品推广语,国内小程序最好的公司,开发小程序要多少钱Python爬虫系列之政务网权责清单数据爬取代码仅供学习交流，请勿用于非法用途小程序爬虫接单、app爬虫接单、网页爬虫接单、接口定制、网站开发、小程序开发 > 点击这里联系我们 < 微信请扫描下方二维码该爬虫使用一般的url请求库执行，未使用爬…
Python爬虫系列之政务网权责清单数据爬取

代码仅供学习交流，请勿用于非法用途
小程序爬虫接单、app爬虫接单、网页爬虫接单、接口定制、网站开发、小程序开发 > 点击这里联系我们 <

微信请扫描下方二维码

在这里插入图片描述
该爬虫使用一般的url请求库执行，未使用爬虫框架，数据库字段未给出，仅供学习参考。

接下来是一段很长很长的代码段。。。。。。。。。。。。。。。。。。。
# -*- coding:utf-8 -*-
import requests
import MySQLdb
from bs4 import BeautifulSoup
import re
import json
import time
import random
import urllib.parse'''@王磊@2019/3/12@内蒙以及下级区域政务数据爬取
'''class NeiMeng:def __init__(self):self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0",}self.conn = MySQLdb.connect(user='root', password='root', database='power', charset='utf8')self.cursor = self.conn.cursor()self.indexURL = "http://zwfw.nmg.gov.cn/"def getProxies(self):'''从ip代理池中抓取一个代理ip:return: '''content = ""while True:try:url = "http://ip.11jsq.com/index.php/api/entry?method=proxyServer.generate_api_url&packid=1&fa=0&fetch_key=&qty=1&time=1&pro=&city=&port=1&format=txt&ss=1&css=&dt=1&specialTxt=3&specialJson="resp = requests.get(url, headers=self.headers)content = str(resp.content.decode("utf-8"))if "Service" in content or "code" in content or "html" in content:continueelse:return {"http": "http://" + content}except Exception as e:continuedef getHTML(self, url, headers=None, encoding="utf-8", proxies=None):'''get方式发起请求:param url::param headers::return:'''while True:try:resp = ""proxies = Noneif headers:resp = requests.get(url, headers=headers, proxies=proxies)else:resp = requests.get(url, headers=self.headers, proxies=proxies)return resp.content.decode(encoding, "ignore")except Exception as e:print(e)continuedef postHTML(self, url, data, headers=None, encoding="utf-8", proxies=None):'''post方式发起请求:param url::param data::param headers::return:'''while True:try:resp = ""if headers:resp = requests.post(url, data=data, headers=headers, proxies=proxies)else:resp = requests.post(url, data=data, headers=self.headers, proxies=proxies)return resp.content.decode("utf-8")except Exception as e:print(e)continuedef pipToMysql(self, detail, simple, url):'''数据存储入库:param detail::param simple::param url::return:'''unicode = str(abs(hash(url)))try:self.cursor.execute("insert simplelist(pid, city_id, area_id, town_id, categoryid, categoryname, deptid, deptname, title, uniquecode) values(%d, %d, %d, %d, %d, '%s', %d, '%s', '%s', '%s')"% (simple['pid'], simple['city_id'], simple['area_id'], simple['town_id'], simple['categoryID'], simple['categoryName'], simple['deptID'], simple['deptName'], simple['title'], unicode))self.conn.commit()self.cursor.execute("select id from simplelist where uniquecode = '%s'" % unicode)webid = self.cursor.fetchall()[0][0]self.cursor.execute("insert details(webid, detail) values(%d, '%s')" % (webid, detail))self.conn.commit()except Exception as e:passdef pipLineArea(self, areaList, pid):'''存储区域信息:param areaList::param pid::return:'''for area in areaList:city_name = list(area.keys())[0]area_list = list(area.values())[0]self.cursor.execute("insert city(pid, city_name) values(%d, '%s')" % (pid, city_name))self.conn.commit()self.cursor.execute("select * from city where pid = %d and city_name = '%s'" % (pid, city_name))city_id = self.cursor.fetchall()[0][0]for area_name in area_list:self.cursor.execute("insert area(pid, city_id, area_name) values(%d, %d, '%s')" % (pid, city_id, area_name))self.conn.commit()def getDeptList(self, deptTupleList):'''通过部门的元组列表更新部门列表库，获取指定数据格式的部门字符串列表:param deptTupleList::return:'''deptList = ""for deptTuple in deptTupleList:try:self.cursor.execute("insert deptlist(department) values('%s')" % deptTuple[1])self.conn.commit()self.cursor.execute("select * from deptlist where department = '%s'" % deptTuple[1])deptList = deptList + "" + str(self.cursor.fetchall()[0][0]) + ":" + deptTuple[1] + ","except Exception as e:self.cursor.execute("select * from deptlist where department = '%s'" % deptTuple[1])deptList = deptList + "" + str(self.cursor.fetchall()[0][0]) + ":" + deptTuple[1] + ","return deptList[:-1]def updateDeptList(self, areaType, areaID, deptList):'''更新区域级别的部门列表信息:param areaType::param areaID::param deptList::return:'''areaName = ""if areaType == 0:areaName = "province"elif areaType == 1:areaName = "city"else:areaName = "area"self.cursor.execute("update %s set deptlist = '%s' where id = %d" % (areaName, deptList, areaID))self.conn.commit()def getCategoryID(self, categoryName):'''通过行政类型的名称获取行政id:param categoryName::return:'''categoryID = 0try:self.cursor.execute("insert admincategory(name) values('%s')" % categoryName)self.conn.commit()self.cursor.execute("select * from admincategory where name = '%s'" % categoryName)categoryID = self.cursor.fetchall()[0][0]except Exception as e:self.cursor.execute("select * from admincategory where name = '%s'" % categoryName)categoryID = self.cursor.fetchall()[0][0]return categoryIDdef getDeptID(self, deptName, areaType, areaID):'''通过部门名称获取部门id，如果该部门不存在会将新的部门更新至该区域的部门列表字符串末尾进行追加操作:param deptName::param areaType::param areaID::return:'''self.cursor.execute("select * from deptlist where department = '%s'" % deptName)deptID = 0try:deptID = self.cursor.fetchall()[0][0]except Exception as e:self.cursor.execute("insert deptlist(department) values('%s')" % deptName)self.conn.commit()self.cursor.execute("select * from deptlist where department = '%s'" % deptName)deptID = self.cursor.fetchall()[0][0]if areaType == 0:self.cursor.execute("select * from province where id = %d" % areaID)deptlist = self.cursor.fetchall()[0][2] + "," + str(deptID) + ":" + deptNameself.cursor.execute("update province set deptlist = '%s' where id = %d" % (deptlist, areaID))elif areaType == 1:self.cursor.execute("select * from city where id = %d" % areaID)deptlist = self.cursor.fetchall()[0][3] + "," + str(deptID) + ":" + deptNameself.cursor.execute("update city set deptlist = '%s' where id = %d" % (deptlist, areaID))else:self.cursor.execute("select * from area where id = %d" % areaID)deptlist = self.cursor.fetchall()[0][4] + "," + str(deptID) + ":" + deptNameself.cursor.execute("update area set deptlist = '%s' where id = %d" % (deptlist, areaID))return deptIDdef getIDList(self, areaType, areaID):'''获取该类型区域的id列表:param areaType::param areaID::return:'''idList = {"pid": 0,"city_id": 0,"area_id": 0,"town_id": 0,}if areaType == 0:idList["pid"] = areaIDelif areaType == 1:idList["city_id"] = areaIDelif areaType == 2:idList["area_id"] = areaIDelif areaType == 3:idList["town_id"] = areaIDreturn idList#################################################              内蒙部分                       #################################################def getAreaList(self, html):'''获取内蒙自治区的部门列表:param html::return:'''soup = BeautifulSoup(html, "html.parser")city_a = soup.find("li", attrs={"class": "qh2 clear"}).find_all("a")cityList = []for a in city_a:cityList.append(a.text)area_li = soup.find_all("li", attrs={"class": re.compile("qh3 clear.*?")})index = 0areaList = []for li in area_li:area_as = li.find_all("a")area_meta = []for area_a in area_as:area_meta.append(area_a.text)areaList.append({cityList[index]: area_meta})index = index + 1return areaListdef nmParser(self, indexURL):'''内蒙地区的数据解析:param indexURL::return:'''# 获取省份idself.cursor.execute("select * from province where province_name = '内蒙古自治区'")pid = self.cursor.fetchall()[0][0]idList = self.getIDList(0, pid)indexHTML = self.getHTML(indexURL)soup = BeautifulSoup(indexHTML, "html.parser")req = re.compile(r'<a.*?href=\'javascript:changeinfo\("","(.*?)","",""\);\' title="(.*?)">')deptTupleList = re.findall(req, str(soup.find("div", attrs={"id": "dept"})))deptList = self.getDeptList(deptTupleList)# 更新该区域的部门列表self.updateDeptList(0, pid, deptList)# 分部门请求allnums_req = re.compile(r'<span class="right_tit2_r1">共(.*?)个主项</span>')for deptTuple in deptTupleList:# 获取部门信息deptName = deptTuple[1]deptID = self.getDeptID(deptName, 0, pid)currURL = "http://zwfw.nmg.gov.cn/zwfw/sxcx/itemList/gr_list.do?webId=1&zt=&deptid=" + deptTuple[0]currHTML = self.getHTML(currURL)# 获取总的主项个数totalNums = int(re.findall(allnums_req, currHTML)[0])# 总页数totalPages = (totalNums // 12) + 1 if totalNums % 12 != 0 else (totalNums // 12)# 分页请求for page in range(1, 1 + totalPages):requURL= currURL + "&isonline=&word=&page_num=" + str(page)html = self.getHTML(requURL)page_soup = BeautifulSoup(html, "html.parser")bsznList = page_soup.find_all("a", attrs={"class": "bszn"})for bszn in bsznList:bsznURL = "http://zwfw.nmg.gov.cn" + bszn["href"]bsznHTML = self.getHTML(bsznURL)bsznSoup = BeautifulSoup(bsznHTML, "html.parser")# 获取行政信息categoryName = bsznSoup.find("td", attrs={"id": "sxlx"}).text[-5:].replace(" ", "")categoryID = self.getCategoryID(categoryName)# 事项标题title = bsznSoup.find("meta", attrs={"name": "ArticleTitle"})["content"]############### 列表信息 ##############simple = {}simple['pid'] = idList["pid"]simple['city_id'] = idList["city_id"]simple['area_id'] = idList["area_id"]simple['town_id'] = idList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = title############### 详细信息 ##############divs = bsznSoup.find_all("div", attrs={"class": "qdhg30"})main_tab_item_divs = bsznSoup.find_all("div", attrs={"class": "main_tab_item"})# 受理条件sltj = ""try:sltj = {"title": "受理条件", "th": [], "list": [divs[0].text.replace("\n", "")]}except Exception as e:sltj = {"title": "受理条件", "th": [], "list": []}# 设定依据sdyj = ""try:sdyj = {"title": "设定依据", "th": [], "list": [divs[1].text.replace("\r", "").replace("\n", "").replace("\xa0", "")]}except Exception as e:sdyj = {"title": "设定依据", "th": [], "list": []}# 收费标准及依据sfbzjyj = ""try:sfbzjyj = {"title": "收费标准及依据", "th": [], "list": [main_tab_item_divs[3].find("div", attrs={"class": "qdhg30"}).text.replace(" ", "").replace("\n", "")]}except Exception as e:sfbzjyj = {"title": "收费标准及依据", "th": [], "list": []}# 办理流程bllc = ""try:bllc_list = []bllc_req = re.compile('(\[{.*?}\])', re.S)bllc_contents = json.loads(re.findall(bllc_req, str(bsznSoup.find("div", attrs={"id": "blhj-xml"})))[0])for bllc_content in bllc_contents:bllc_meta = {"link": bllc_content["hjmc"],"standard": bllc_content["scbz"]}bllc_list.append(bllc_meta)bllc = {"title": "办理流程", "th": ["环节", "审查标准"], "list": bllc_list}except Exception as e:bllc = {"title": "办理流程", "th": ["环节", "审查标准"], "list": []}# 流程图llt = ""try:llt = {"title": "流程图", "th": [], "list": ["http://zwfw.nmg.gov.cn" + bsznSoup.find("img", attrs={"class": "bl-img"})["data"]]}except Exception as e:llt = {"title": "流程图", "th": [], "list": []}# 办理材料blcl = ""try:blcl_trs = main_tab_item_divs[6].find_all("tr")[1:]blcl_list = []for blcl_tr in blcl_trs:blcl_tds = blcl_tr.find_all("td")blcl_meta = {"name": blcl_tds[0].text if "下载" not in blcl_tds[0].text else blcl_tds[0].text.replace("[空表下载]", ""),"type": blcl_tds[1].text,"source": blcl_tds[2].text,"condition": blcl_tds[3].text,"nums": blcl_tds[4].text,"require": blcl_tds[5].text,"emptytable": blcl_tds[0].find("a")["href"] if blcl_tds[0].find("a") else "无","necessary": blcl_tds[6].text,}blcl_list.append(blcl_meta)blcl = {"title": "办理材料", "th": ["材料名称", "材料类型", "来源渠道", "受理标准", "材料份数", "材料要求", "空表下载", "是否必须"], "list": blcl_list}except Exception as e:blcl = {"title": "办理材料", "th": ["材料名称", "材料类型", "来源渠道", "受理标准", "材料份数", "材料要求", "空表下载", "是否必须"], "list": []}# 常见问题cjwt = ""try:cjwt_list = []cjwt_trs = main_tab_item_divs[7].find_all("tr")[1:]for cjwt_tr in cjwt_trs:cjwt_tds = cjwt_tr.find_all("td")cjwt_meta = {"question": cjwt_tds[0].text,"answer": cjwt_tds[1].text,}cjwt_list.append(cjwt_meta)cjwt = {"title": "常见问题", "th": ["问题", "暂无常见问题解答。"], "list": cjwt_list}except Exception as e:cjwt = {"title": "常见问题", "th": ["问题", "暂无常见问题解答。"], "list": []}extends = []extends.append(sltj)extends.append(sdyj)extends.append(sfbzjyj)extends.append(bllc)extends.append(llt)extends.append(blcl)extends.append(cjwt)detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")self.pipToMysql(detail, simple, bsznURL)#################################################              呼和浩特部分                   #################################################def gethsCurrKeys(self):'''生成呼和浩特地区访问的s和t加密数据:return:'''chars = "0123456789abcdef"curTime = str(int(random.random() * 9000 + 1000)) + "" + str(time.time())[:10] + "000"s = "a" + curTimekey = ""keyIndex = -1for i in range(6):c = s[keyIndex + 1]key = key + ckeyIndex = chars.index(c)if keyIndex < 0 or keyIndex >= len(s):keyIndex = it = str(int(random.random() * 9000 + 1000)) + "_" + key + "_" + str(time.time())[:10] + "000"return s, tdef hsParser(self, url, pid):'''呼和浩特区域数据解析:param url::param pid::return:'''# 获取呼和浩特的城市idself.cursor.execute("select * from city where city_name = '呼和浩特市' and pid = %d" % pid)city_id = self.cursor.fetchall()[0][0]indexHTML = self.getHTML(url)indexSoup = BeautifulSoup(indexHTML, "html.parser")index_as = indexSoup.find("ul", attrs={"class": "tabTwo-ul"}).find_all("a")# 获取该站点的子站点数据areaList = []areaCode_req = re.compile(r'gov\.cn/(.*?)/public/index')for index_a in index_as:areaCode = re.findall(areaCode_req, index_a["href"])[0]areaList.append((index_a.text, areaCode))# 分区域请求for area in areaList:areaType = 0areaID = 0idList = ""if area[0] == "呼和浩特市":areaType = 1self.cursor.execute("select * from city where pid = %d and city_name = '%s'" % (pid, area[0]))areaID = self.cursor.fetchall()[0][0]idList = self.getIDList(1, areaID)else:areaType = 2self.cursor.execute("select * from area where pid = %d and area_name = '%s'" % (pid, area[0]))areaID = self.cursor.fetchall()[0][0]idList = self.getIDList(2, areaID)listURL = "http://zwfw.huhhot.gov.cn/" + area[1] + "/icity/listintquery/qllist"proxies = self.getProxies()listResp = requests.get(listURL, headers=self.headers, proxies=proxies)# 获取该区域的cookiecookie_req = re.compile(r'(ICITYSession=.*?);')cookie = re.findall(cookie_req, listResp.headers["Set-Cookie"])[0]# 构造请求头headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0","Cookie": "fontZoomState=0; " + cookie}# 获取站点的区域ids, t = self.gethsCurrKeys()siteAreaCodeURL = "http://zwfw.huhhot.gov.cn/" + area[1] + "/api-v2/app.pmi.config.cmd.ConfigCmd/getConfigInfo?s=" + s + "&t=" + tsiteAreaCode = json.loads(self.postHTML(siteAreaCodeURL, json.dumps({"key": "WebRegion"}), headers=headers))["data"]# 获取当前区域的部门列表deptList_organ = ""while True:try:s, t = self.gethsCurrKeys()deptURL = "http://zwfw.huhhot.gov.cn/" + area[1] + "/api-v2/app.icity.ServiceCmd/getDeptList?s=" + s + "&t=" + tdeptList_organ = json.loads(self.postHTML(deptURL, json.dumps({"region_code": siteAreaCode}), headers=headers))["data"]["organ"]breakexcept Exception as e:continuedeptTupleList = []for deptList_ in deptList_organ:deptTupleList.append((None, deptList_["NAME"]))deptList = self.getDeptList(deptTupleList)# 更新部门列表至所在区域self.updateDeptList(areaType, areaID, deptList)# 无需分部门请求，该站点存在一个“全部”类型的选项# 获取列表post请求的主页，用于获取总页数total = 0while True:try:s, t = self.gethsCurrKeys()initListURL = "http://zwfw.huhhot.gov.cn/" + area[1] + "/api-v2/app.icity.govservice.GovProjectCmd/getInitList?s=" + s + "&t=" + ttotal = json.loads(self.postHTML(initListURL, json.dumps({"region_code": siteAreaCode, "deptid": "", "page": 1, "limit": 8, "ITEM_TYPE": ""}), headers=headers))["total"]breakexcept Exception as e:continuetotalPage = total // 8 if total % 8 == 0 else (total // 8) + 1# 分页请求for page in range(1, 1 + totalPage):currListData = json.dumps({"region_code": siteAreaCode, "deptid": "", "page": page, "limit": 8, "ITEM_TYPE": ""})currListRespData = ""while True:try:s, t = self.gethsCurrKeys()currListURL = "http://zwfw.huhhot.gov.cn/" + area[1] + "/api-v2/app.icity.govservice.GovProjectCmd/getInitList?s=" + s + "&t=" + tcurrListRespData = json.loads(self.postHTML(currListURL, currListData, headers))["data"]if currListRespData:breakexcept Exception as e:continue# 分事项请求for currList in currListRespData:currURL = "http://zwfw.huhhot.gov.cn/" + area[1] + "/icity/proinfo?id=" + currList["ITEM_ID"]currHTML = ""while True:try:currHTML = self.getHTML(currURL)breakexcept Exception as e:continue# 获取item_iditem_id = re.findall(r'itemId=(.*?)"', currHTML)[0]currSoup = BeautifulSoup(currHTML, "html.parser")# 事项标题title = currSoup.find("h3").text# 获取部门信息deptName = currSoup.find("table", attrs={"class": "table table-bordered its-table-overview"}).find("td").textdeptID = self.getDeptID(deptName, areaType, areaID)# 行政类型categoryName = currSoup.find("nav", attrs={"class": "sidebar its-sidebar text-center"}).find("p").text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")categoryID = self.getCategoryID(categoryName)############################     清单列表数据    ###########################simple = {}simple['pid'] = pidsimple['city_id'] = idList["city_id"]simple['area_id'] = idList["area_id"]simple['town_id'] = idList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = title############################     事项详细数据    ###########################div_style = currSoup.find_all("div", attrs={"style": "padding: 0 20px; line-height: 2;"})table_class = currSoup.find_all("table", attrs={"class": "table table-bordered"})# 获取存在的表信息,生成表编号对应表地址映射table_datas = ""while True:try:s, t = self.gethsCurrKeys()tableURL = "http://zwfw.huhhot.gov.cn/" + area[1] + "/api-v2/app.icity.govservice.GovProjectCmd/getAllItemInfoByItemID?s=" + s + "&t=" + ttable_datas = json.loads(self.postHTML(tableURL, json.dumps({"itemid": item_id}), headers))["data"]if table_datas:breakexcept Exception as e:continuetableDect = {}for table_data in table_datas:if table_data["SAMPLE_NAME"]:sample = table_data["SAMPLE"]sample_name = table_data["SAMPLE_NAME"]sampleList = sample.split(";")sampleNameList = sample_name.split(";")table_meta = []table_index = 0for sample_ in sampleList:if sample_:table_meta.append("http://zwfw.huhhot.gov.cn/" + area[1] + "/bsp/uploadify?action=downloadFileToServer&path=" + sample_ + "&name=" + sampleNameList[table_index] + "&download=no")table_index = table_index + 1tableDect[table_data["CODE"]] = table_meta# 实施依据ssyj = ""try:ssyj_req = re.compile(r'：</strong>(.*?)<br', re.S)ssyj_br = re.findall(ssyj_req, str(div_style[0]))nums = len(ssyj_br) // 4ssyj_list = []for num in range(nums):startNums = num * 4ssyj_meta = {"index": num + 1,"name": ssyj_br[startNums],"no": ssyj_br[startNums + 1],"list": ssyj_br[startNums + 2],"content": ssyj_br[startNums + 3],}ssyj_list.append(ssyj_meta)ssyj = {"title": "实施依据", "th": ["序号", "依据名称", "法律文号", "法律条款", "依据内容"], "list": ssyj_list}except Exception as e:ssyj = {"title": "实施依据", "th": ["序号", "依据名称", "法律文号", "法律条款", "依据内容"], "list": []}# 收费（征收）的标准及依据sfbzyj = ""try:sfbzyj_req = re.compile(r'：</strong>(.*?)<strong>', re.S)sfbzyj_br = re.findall(sfbzyj_req, str(div_style[1]))nums = len(sfbzyj_br) // 3sfbzyj_list = []for num in range(nums):startNums = num * 3sfbzyj_meta = {"index": num + 1,"name": sfbzyj_br[startNums].replace(" ", "").replace("\n", "").replace("\r", ""),"basic": sfbzyj_br[startNums + 1].replace(" ", "").replace("\n", "").replace("\r", ""),"standard": sfbzyj_br[startNums + 2].replace(" ", "").replace("\n", "").replace("\r", ""),}sfbzyj_list.append(sfbzyj_meta)sfbzyj = {"title": "收费（征收）的标准及依据", "th": ["序号", "费用名称", "收费依据", "收费标准"], "list": sfbzyj_list}except Exception as e:sfbzyj = {"title": "收费（征收）的标准及依据", "th": ["序号", "费用名称", "收费依据", "收费标准"], "list": []}# 受理条件sltj = ""try:sltj_req = re.compile(r'<strong>(.*?)</strong>', re.S)sltj_list = re.findall(sltj_req, str(div_style[2]))sltj_content = ";".join(sltj_list)sltj = {"title": "受理条件", "th": [], "list": [sltj_content]}except Exception as e:sltj = {"title": "受理条件", "th": [], "list": []}# 申报材料sbcl = ""try:sbcl_list = []sbcl_trs = table_class[0].find_all("tr")[2:]for sbcl_tr in sbcl_trs:sbcl_tds = sbcl_tr.find_all("td")# 查看当前项是否包含样例表tableID = sbcl_tds[6].find("a", attrs={"type": "button"})["title"] if sbcl_tds[6].find("a", attrs={"type": "button"}) else Noneexample = ""if tableID:example = tableDect[tableID]sbcl_meta = {"name": sbcl_tds[0].text,"original": sbcl_tds[1].text.replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", ""),"copy": sbcl_tds[2].text.replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", ""),"format": sbcl_tds[3].text.replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", ""),"type": sbcl_tds[4].text.replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", ""),"require": sbcl_tds[5].text.replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", ""),"example": example,"source": sbcl_tds[7].text.replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", ""),"issuingunit": sbcl_tds[8].text.replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", ""),"comm": sbcl_tds[9].text.replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", ""),}sbcl_list.append(sbcl_meta)sbcl = {"title": "申报材料", "th": ["材料名称", "原件", "复印件", "材料格式", "材料类型", "签名签章要求", "样例", "材料来源", "出具单位", "备注"], "list": sbcl_list}except Exception as e:sbcl = {"title": "申报材料", "th": ["材料名称", "原件", "复印件", "材料格式", "材料类型", "签名签章要求", "样例", "材料来源", "出具单位", "备注"], "list": []}# 电子表格下载dzbgxz = ""try:dzbgxz_trs = table_class[1].find_all("tr")[1:]dzbgxz_list = []for dzbgxz_tr in dzbgxz_trs:dzbgxz_tds = dzbgxz_tr.find_all("td")dzbgxz_meta = {"no": dzbgxz_tds[0].text,"name": dzbgxz_tds[1].text,"download": tableDect[dzbgxz_tds[2]["title"]]}dzbgxz_list.append(dzbgxz_meta)dzbgxz = {"title": "电子表格下载", "th": ["文书编号", "文书名称", "表格下载"], "list": dzbgxz_list}except Exception as e:dzbgxz = {"title": "电子表格下载", "th": ["文书编号", "文书名称", "表格下载"], "list": []}# 结果材料jgcl = ""try:jgcl_trs = table_class[2].find_all("tr")[2:]jgcl_list = []for jgcl_tr in jgcl_trs:jgcl_tds = jgcl_tr.find_all("td")jgcl_meta = {"no": jgcl_tds[0].text,"name": jgcl_tds[1].text,"example": "http://zwfw.huhhot.gov.cn" + jgcl_tds[2].find("a")["href"]}jgcl_list.append(jgcl_meta)jgcl = {"title": "结果材料", "th": ["编号", "结果名称", "结果样本"], "list": jgcl_list}except Exception as e:jgcl = {"title": "结果材料", "th": ["编号", "结果名称", "结果样本"], "list": []}# 办理流程bllc = ""try:bllc_trs = table_class[3].find_all("tr")[1:]bllc_list = []for bllc_tr in bllc_trs:bllc_tds = bllc_tr.find_all("td")bllc_meta = {"linkname": bllc_tds[0].text,"username": bllc_tds[1].text,"timelimit": bllc_tds[2].text,"standard": bllc_tds[3].text}bllc_list.append(bllc_meta)bllc = {"title": "办理流程", "th": ["环节名称", "办理人姓名", "办理时限(工作日)", "审批标准"], "list": bllc_list}except Exception as e:bllc = {"title": "办理流程", "th": ["环节名称", "办理人姓名", "办理时限(工作日)", "审批标准"], "list": []}# 流程图llt = ""try:llt = {"title": "流程图", "th": [], "list": ["http://zwfw.huhhot.gov.cn" + currSoup.find("div", attrs={"style": "text-align:center"}).find("img")["src"]]}except Exception as e:llt = {"title": "流程图", "th": [], "list": []}extends = []extends.append(ssyj)extends.append(sfbzyj)extends.append(sltj)extends.append(sbcl)extends.append(dzbgxz)extends.append(jgcl)extends.append(bllc)extends.append(llt)detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")self.pipToMysql(detail, simple, currURL)#################################################              包头市部分                     #################################################def btParser(self, url, pid):'''包头市区域数据解析:param url::param pid::return:'''# 获取城市idself.cursor.execute("select * from city where city_name = '%s' and pid = %d" % ("包头市", pid))city_id = self.cursor.fetchall()[0][0]# 获取部门列表indexHTML = self.getHTML(url)indexSoup = BeautifulSoup(indexHTML, "html.parser")dept_as = indexSoup.find("div", attrs={"class": "department_box line_box"}).find_all("a")deptTupleList = []for dept_a in dept_as:deptTupleList.append(("http://www.baotou.gov.cn/xxgk/" + dept_a["href"], dept_a.text))# 获取部门列表deptList = self.getDeptList(deptTupleList)# 更新部门列表至区域表self.updateDeptList(1, city_id, deptList)# 分部门请求for deptTuple in deptTupleList:deptName = deptTuple[1]deptID = self.getDeptID(deptName, 1, city_id)# 获取当前区域的当前部门的前缀urlcurrPreURL = deptTuple[0][:deptTuple[0].rindex("/")]# 当前部门的主页htmldeptHTML = self.getHTML(deptTuple[0])# 获取行政类别列表deptSoup = BeautifulSoup(deptHTML, "html.parser")categoryType_as = deptSoup.find("div", attrs={"class": "hd"}).find_all("a")for categoryType_a in categoryType_as:listURL_ = currPreURL + "/" + categoryType_a["href"][:-4] + "/"listURL = currPreURL + "/" + categoryType_a["href"]categoryName = categoryType_a.text# 获取行政类别idcategoryID = self.getCategoryID(categoryName)listHTML = self.getHTML(listURL)listSoup = BeautifulSoup(listHTML, "html.parser")totalNums = Nonetry:totalNums = int(re.findall(r'共(.*?)条', listSoup.find("td", attrs={"id": "fanye39906"}).text)[0])except Exception as e:continuetotalPages = totalNums // 15 if totalNums % 15 == 0 else (totalNums // 15) + 1# 分页请求for page in range(1, totalPages + 1):currListURL = ""if page == 1:currListURL = listURLelse:currListURL = listURL_ + str(page) + "htm"currListHTML = self.getHTML(currListURL)currListSoup = BeautifulSoup(currListHTML, "html.parser")# 获取当前页的列表清单currList_lis = currListSoup.find_all("li", attrs={"id": re.compile(r'line_u2_.*?')})# 分事项请求获取详细界面for currList_li in currList_lis:detailURL = ""try:detailURL = "http://www.baotou.gov.cn" + str(re.findall(r'(/info/.*?/.*?.htm)', currList_li.find("a")["href"])[0])except Exception as e:continuedetailHTML = self.getHTML(detailURL)currSoup = BeautifulSoup(detailHTML, "html.parser")trs = currSoup.find("div", attrs={"class": "show_content"}).find_all("tr")title = ""extends = []for tr in trs:metaTitle = tr.find("td", attrs={"class": "table_title"}).text.replace(" ", "").replace("\r", "").replace("\t", "").replace("\n", "")metaContent = tr.find("td", attrs={"class": "table_text"}).text.replace(" ", "").replace("\r", "").replace("\t", "").replace("\n", "")if metaTitle == "权力名称":title = metaContentelse:extends.append({"title": metaTitle, "th": [], "list": [metaContent]})detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")areaIDList = self.getIDList(1, city_id)simple = {}simple['pid'] = pidsimple['city_id'] = areaIDList["city_id"]simple['area_id'] = areaIDList["area_id"]simple['town_id'] = areaIDList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = titleself.pipToMysql(detail, simple, detailURL)#################################################              呼伦贝尔部分                   #################################################def hlbeParser(self, url, pid):'''呼伦贝尔数据解析:param url::param pid::return:'''# 获取城市idself.cursor.execute("select * from city where city_name = '%s' and pid = %d" % ("呼伦贝尔市", pid))city_id = self.cursor.fetchall()[0][0]areaIDList = self.getIDList(1, city_id)indexHTML = self.getHTML(url)indexSoup = BeautifulSoup(indexHTML, "html.parser")index_as = indexSoup.find("div", attrs={"class": "qz-box"}).find_all("a")deptTupleList = []for index_a in index_as:deptTupleList.append(("http://www.hlbe.gov.cn" + index_a["href"], index_a.text))# 获取部门列表deptList = self.getDeptList(deptTupleList)# 更新部门列表至所在区域self.updateDeptList(1, city_id, deptList)# 分部门请求for deptTuple in deptTupleList:# 获取部门信息deptName = deptTuple[1]deptID = self.getDeptID(deptName, 1, city_id)# 获取部门主页数据，优先抓取行政类型deptIndexHTML = self.getHTML(deptTuple[0])deptIndexSoup = BeautifulSoup(deptIndexHTML, "html.parser")deptIndex_as = deptIndexSoup.find("div", attrs={"class": "top"}).find_all("a")# 分行政类型请求for deptIndex_a in deptIndex_as:listURL = "http://www.hlbe.gov.cn" + deptIndex_a["href"]# 获取行政信息categoryName = deptIndex_a.textcategoryID = self.getCategoryID(categoryName)# 获取总页数listHTML = self.getHTML(listURL)listSoup = BeautifulSoup(listHTML, "html.parser")pageOB = listSoup.find("span", attrs={"class": "currentpage"})totalPages = 1if pageOB:totalPages = int(re.findall(r'/ (.*)', pageOB.text)[0])# 分页请求for page in range(1, totalPages + 1):currListURL = listURL + "&page=" + str(page)currListHTML = self.getHTML(currListURL)currListSoup = BeautifulSoup(currListHTML, "html.parser")currList_as = currListSoup.find("div", attrs={"class": "list"}).find_all("a")# 分事项请求for currList_a in currList_as:title = currList_a.textdetailURL = "http://www.hlbe.gov.cn" + currList_a["href"]detailHTML = self.getHTML(detailURL)detailSoup = BeautifulSoup(detailHTML, "html.parser")detail_trs = detailSoup.find("div", attrs={"class": "tab-detail"}).find_all("tr")[3:]extends = []for detail_tr in detail_trs:detail_tds = detail_tr.find_all("td")extends.append({"title": detail_tds[0].text, "th": [], "list": [detail_tds[1].text.replace("\r", "").replace("\n", "").replace(" ", "")]})detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")simple = {}simple['pid'] = pidsimple['city_id'] = areaIDList["city_id"]simple['area_id'] = areaIDList["area_id"]simple['town_id'] = areaIDList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = titleself.pipToMysql(detail, simple, detailURL)#################################################              通辽市部分                     #################################################def tlGetDetail(self, url, areaType, areaID, pid):'''通过事项信息界面对详细数据进行解析:param url::param areaType::param areaID::param pid::return:'''detailHTML = self.getHTML(url)detailSoup = BeautifulSoup(detailHTML, "html.parser")title = detailSoup.find("title").textdetail_trs = detailSoup.find("div", attrs={"class": "table_box"}).find_all("tr")# 不是最终界面，需要重新捕获最终地址if not detail_trs:url = detailSoup.find("div", attrs={"class": "business_list"}).find("a")["href"]detailHTML = self.getHTML(url)detailSoup = BeautifulSoup(detailHTML, "html.parser")detail_trs = detailSoup.find("div", attrs={"class": "table_box"}).find_all("tr")# 获取行政类型信息categoryName = detail_trs[0].find("td", attrs={"class": "td_width"}).textcategoryID = self.getCategoryID(categoryName)# 获取部门信息deptName = detail_trs[4].find("td", attrs={"class": "td_width"}).textdeptID = self.getDeptID(deptName, areaType, areaID)# 获取区域id列表areaIDList = self.getIDList(areaType, areaID)##########################     列表信息        #########################simple = {}simple['pid'] = pidsimple['city_id'] = areaIDList["city_id"]simple['area_id'] = areaIDList["area_id"]simple['town_id'] = areaIDList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = title##########################     详细信息        #########################divs = detailSoup.find_all("div", attrs={"class": "tab_list"})# 受理条件sltj = ""try:sltj = {"title": "受理条件", "th": [], "list": [detailSoup.find("div", attrs={"class": "tab_list_text tab_list"}).text.replace(" ", "").replace("\r", "").replace("\n", "")]}except Exception as e:sltj = {"title": "受理条件", "th": [], "list": []}# 办理材料blcl = ""try:blcl_trs = detailSoup.find("table", attrs={"class": "falv_table"}).find_all("tr")[1:]blcl_list = []for blcl_tr in blcl_trs:blcl_tds = blcl_tr.find_all("td")blcl_meta = {"index": blcl_tds[0].text,"name": blcl_tds[1].text,"type": blcl_tds[2].text.replace("\r", "").replace("\t", "").replace("\n", "").replace(" ", ""),"spec": blcl_tds[3].text,"source": blcl_tds[4].text.replace("\r", "").replace("\t", "").replace("\n", "").replace(" ", ""),"nums": blcl_tds[5].text,"example": blcl_tds[6].find("a")["href"] if blcl_tds[6].find("a") else "","table": blcl_tds[7].find("a")["href"] if blcl_tds[7].find("a") else "","know": blcl_tds[8].text}blcl_list.append(blcl_meta)blcl = {"title": "办理材料", "th": ["序号", "材料名称", "材料类型", "材料规格", "来源渠道", "材料份数", "材料样表", "电子表格", "填报须知"], "list": blcl_list}except Exception as e:blcl = {"title": "办理材料", "th": ["序号", "材料名称", "材料类型", "材料规格", "来源渠道", "材料份数", "材料样表", "电子表格", "填报须知"], "list": []}# 办理流程图lct = ""try:lct = {"title": "办理流程图", "th": [], "list": [detailSoup.find("img", attrs={"id": "imageSrc"})["src"]]}except Exception as e:lct = {"title": "办理流程图", "th": [], "list": []}# 设定依据sdyj = ""try:sdyj = {"title": "设定依据", "th": [], "list": [detailSoup.find("div", attrs={"style": "display: none;text-align : left"}).text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "").replace("\u3000", "")]}except Exception as e:sdyj = {"title": "设定依据", "th": [], "list": []}# 收费依据和标准sfyjbz = ""try:sfyjbz = {"title": "收费依据和标准", "th": [], "list": [divs[3].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "").replace("\u3000", "")]}except Exception as e:sfyjbz = {"title": "收费依据和标准", "th": [], "list": []}# 办理环节blhj = ""try:blhj_trs = divs[5].find_all("tr")[1:]blhj_list = []for blhj_tr in blhj_trs:blhj_tds = blhj_tr.find_all("td")blhj_meta = {"index": blhj_tds[0].text,"name": blhj_tds[1].text,"timeLimit": blhj_tds[2].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"user": blhj_tds[3].text,"ifSpec": blhj_tds[4].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"proType": blhj_tds[5].text}blhj_list.append(blhj_meta)blhj = {"title": "办理环节", "th": ["序号", "环节名称", "办理时限", "办理人", "是否允许特别程序", "特别程序类型"], "list": blhj_list}except Exception as e:blhj = {"title": "办理环节", "th": ["序号", "环节名称", "办理时限", "办理人", "是否允许特别程序", "特别程序类型"], "list": []}extends = []extends.append(sltj)extends.append(blcl)extends.append(lct)extends.append(sdyj)extends.append(sfyjbz)extends.append(blhj)detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")print(detail)self.pipToMysql(detail, simple, url)def tlGetDeptTuple(self, area):'''获取通辽市区域的部门元组列表:param area::return:'''siteID_req = re.compile(r'siteId=(.*?)&random')siteID = re.findall(siteID_req, area[0])[0]siteList_ = json.loads(self.postHTML("http://zwfw.tongliao.gov.cn/mhwz/portal/orgService/orgListJson.shtml", {"siteId": siteID}))["data"]siteList_req = re.compile(r'name: \'(.*?)\'', re.S)siteList = re.findall(siteList_req, siteList_)deptTupleList = []for site in siteList:deptTupleList.append((None, site))return deptTupleListdef hlglGetDeptTuple(self, area):'''获取霍林郭勒市的部门元组列表，虽然该区域隶属通辽市，但是站点的地址以及部门数据请求方式不同:param area::return:'''indexHTML = self.getHTML("http://hlgl.tlzw.gov.cn:8091/portal/bszn/gr/list.shtml?siteId=1")deptList_req = re.compile(r'"name":"(.*?)"', re.S)siteList = re.findall(deptList_req, indexHTML)deptTupleList = []for site in siteList:deptTupleList.append((None, site))return deptTupleListdef tlParser(self, url, pid):'''通辽市数据解析:param url::param pid::return:'''# 补插一个自治区级别没有插入的区数据self.cursor.execute("select * from area where area_name = '通辽经济技术开发区' and pid = %d" % pid)tljjjskfqAreaInfo = self.cursor.fetchall()if not tljjjskfqAreaInfo:self.cursor.execute("select * from city where city_name = '%s' and pid = %d" % ('通辽市', pid))tl_city_id = self.cursor.fetchall()[0][0]try:self.cursor.execute("insert area(pid, city_id, area_name) values(%d, %d, '%s')" % (pid, tl_city_id, '通辽经济技术开发区'))self.conn.commit()except Exception as e:pass# 获取区域数据areaURL = "http://zwfw.tongliao.gov.cn/mhwz/portal/default/js/site/site.js"areaHTML = self.getHTML(areaURL)area_req = re.compile(r'siteDictJson = ({1:.*?}});', re.S)area_req_ = re.compile(r'{".*?"}', re.S)areaList_ = re.findall(area_req_, str(re.findall(area_req, areaHTML)[0]))areaList = []for _ in areaList_:_ = json.loads(_)areaList.append(("http://zwfw.tongliao.gov.cn/mhwz/portal/bszn/gr/query.shtml?siteId=" + str(_["oid"]) + "&random=0.1354591534285403&seach=&pageNumber=1", _["siteName"]))areaList.append(("http://hlgl.tlzw.gov.cn:8091/portal/bszn/gr/query.shtml?siteId=1&random=0.4388905260083682&pageNumber=1", "霍林郭勒市"))# 分区域请求for area in areaList:areaType = 0areaID = 0if area[1] == "通辽市":areaType = 1self.cursor.execute("select * from city where city_name = '%s'" % area[1])areaID = self.cursor.fetchall()[0][0]else:areaType = 2self.cursor.execute("select * from area where area_name = '%s'" % area[1])areaID = self.cursor.fetchall()[0][0]# 更新该区域的部门列表deptTupleList = ""if area[1] == '霍林郭勒市':deptTupleList = self.hlglGetDeptTuple(area)else:deptTupleList = self.tlGetDeptTuple(area)deptList = self.getDeptList(deptTupleList)self.updateDeptList(areaType, areaID, deptList)# 可以直接获取到所有数据，无需分类请求# 获取总页数indexHTML = json.loads(self.getHTML(area[0]))totalPages = indexHTML["maxPageNumber"]# 分页请求for page in range(1, totalPages + 1):currListURL = area[0][:-1] + str(page)currListHTML = json.loads(self.getHTML(currListURL))if currListHTML["totalRows"] != 0:currListDatas = currListHTML["datas"]# 分事项请求for currListData in currListDatas:# 获取事项详细界面的URL，提交给详细界面解析函数try:detailURL = ""if area[1] == '霍林郭勒市':detailURL = "http://hlgl.tlzw.gov.cn:8091/portal/bszn/view.shtml?serviceOid=" + currListData["serviceList"][0]["serviceOid"]else:detailURL = "http://zwfw.tongliao.gov.cn/mhwz/portal/bszn/view.shtml?serviceOid=" + currListData["serviceList"][0]["serviceOid"]self.tlGetDetail(detailURL, areaType, areaID, pid)except Exception as e:sxDirectoryChildList = currListData["sxDirectoryChildList"]for sxDirectoryChild in sxDirectoryChildList:detailURL = ""if area[1] == '霍林郭勒市':detailURL = "http://hlgl.tlzw.gov.cn:8091/portal/bszn/view.shtml?serviceOid=" + sxDirectoryChild["serviceList"][0]["serviceOid"]else:detailURL = "http://zwfw.tongliao.gov.cn/mhwz/portal/bszn/view.shtml?serviceOid=" + sxDirectoryChild["serviceList"][0]["serviceOid"]self.tlGetDetail(detailURL, areaType, areaID, pid)#################################################              赤峰市部分                     #################################################def cfPipLineTownInfo(self, url, city_id, pid):'''赤峰市乡镇数据解析存储:param url::param city_id::param pid::return:'''indexHTML = self.getHTML(url)indexSoup = BeautifulSoup(indexHTML, "html.parser")divs = indexSoup.find("div", attrs={"class": "S_Coucities"}).find_all("div")[1:]for div in divs:area_name = div.find("h2").textself.cursor.execute("select * from area where city_id = %d and area_name = '%s'" % (city_id, area_name))area_id = self.cursor.fetchall()[0][0]towns = div.find_all("a")for town in towns:try:self.cursor.execute("insert town(pid, city_id, area_id, town_name) values(%d, %d, %d, '%s')" % (pid, city_id, area_id, town.text))except Exception as e:continueself.conn.commit()def cfParser(self, url, pid):'''赤峰市数据解析:param url::param pid::return:'''# 获取赤峰市城市idself.cursor.execute("select * from city where city_name = '赤峰市' and pid = %d" % pid)city_id = self.cursor.fetchall()[0][0]# 获取乡镇数据并进行存储self.cfPipLineTownInfo(url, city_id, pid)# 因为只有两个地区有数据，所以直接用死的urlareaList = [("http://zwfw.chifeng.gov.cn/grbsListIndex.action?type=grTheme&districtId=150400&sysDict.dictId=40288105474c99a001474cc14f1b0021", "赤峰市"),("http://zwfw.chifeng.gov.cn/grbsListIndex.action?type=grTheme&districtId=150402&sysDict.dictId=40288105474c99a001474cc14f1b0021", "红山区")]# 分区域解析for area in areaList:# 获取区域信息areaType = 0areaID = 0if area[1] == "赤峰市":areaType = 1self.cursor.execute("select * from city where city_name = '%s' and pid = %d" % (area[1], pid))areaID = self.cursor.fetchall()[0][0]else:areaType = 2self.cursor.execute("select * from area where area_name = '%s' and pid = %d" % (area[1], pid))areaID = self.cursor.fetchall()[0][0]areaIDList = self.getIDList(areaType, areaID)listHTML = self.getHTML(area[0])listSoup = BeautifulSoup(listHTML, "html.parser")# 获取部门列表deptList_ = listSoup.find("div", attrs={"class": "m_b"}).find_all("div", attrs={"class": "category_list"})deptTupeList = []for depts in deptList_:depts_as = depts.find_all("a")for depts_a in depts_as:try:dept_req = re.compile(r"\('grDepartmen','(.*?)','(.*?)'\)", re.S)dept_param = re.findall(dept_req, depts_a["onclick"])[0]deptTupeList.append(("http://zwfw.chifeng.gov.cn/grbsListContent.action?type=grDepartmen&districtId=" + dept_param[1] + "&serviceBase.orgId=" + dept_param[0], depts_a.text))except Exception as e:continue# 更新该区域的部门列表deptList = self.getDeptList(deptTupeList)self.updateDeptList(areaType, areaID, deptList)# 分部门请求for deptTupe in deptTupeList:# 获取部门信息deptName = deptTupe[1]deptID = self.getDeptID(deptName, areaType, areaID)deptIndexHTML = self.getHTML(deptTupe[0])deptIndexSoup = BeautifulSoup(deptIndexHTML, "html.parser")# 获取总页数totalPage = 1try:totalPage = int(deptIndexSoup.find("div", attrs={"class": "pagebar735"}).find_all("a")[3].text.replace("ҳ", ""))except Exception as e:pass# 分页请求for page in range(1, totalPage + 1):currListURL = deptTupe[0] + "&page=" + str(page)currListHTML = self.getHTML(currListURL)currListSoup = BeautifulSoup(currListHTML, "html.parser")currList_req = re.compile(r"shouwGuide\(\'(.*?)\',\'(.*?)\'\)", re.S)currList = re.findall(currList_req, currListHTML)# 分事项请求for metaList in currList:detailURL = "http://zwfw.chifeng.gov.cn/guideIndex.action?districtId=" + metaList[1] + "&serviceBase.serviceId=" + metaList[0]detailHTML = self.getHTML(detailURL)print(detailURL)detailSoup = BeautifulSoup(detailHTML, "html.parser")# 获取行政类型categoryName = detailSoup.find("div", attrs={"class": "guide_con"}).find("td", attrs={"class": "td2"}).text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")categoryID = self.getCategoryID(categoryName)# 获取事项标题title = detailSoup.find("h2").text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "").replace("\xa0", "")###################  列表信息    ##################simple = {}simple['pid'] = pidsimple['city_id'] = areaIDList["city_id"]simple['area_id'] = areaIDList["area_id"]simple['town_id'] = areaIDList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = title###################  详细信息    ##################divs = detailSoup.find("div", attrs={"class": "tab_1"}).find_all("div", attrs={"class": "qh"})# 服务对象fwdx = ""try:fwdx = {"title": "服务对象", "th": [], "list": [divs[0].find("p").text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "").replace("\xa0", "")]}except Exception as e:fwdx = {"title": "服务对象", "th": [], "list": []}# 申报条件sbtj = ""try:sbtj = {"title": "申报条件", "th": [], "list": [divs[1].find("p").text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")]}except Exception as e:sbtj = {"title": "申报条件", "th": [], "list": []}# 申报材料sbcl = ""try:sbcl_trs = divs[2].find_all("tr")[1:]sbcl_list = []for sbcl_tr in sbcl_trs:sbcl_tds = sbcl_tr.find_all("td")ifOnline = "否"try:if sbcl_tds[3].find("input")["checked"]:ifOnline = "是"except Exception as e:passifElec = "否"try:if sbcl_tds[4].find("input")["checked"]:ifElec = "是"except Exception as e:passstructure = ""try:if sbcl_tds[5].find_all("input")[0]["checked"]:structure = "图片"except Exception as e:try:if sbcl_tds[5].find_all("input")[1]["checked"]:structure = "文档"except Exception as e:structure = "其他"sbcl_meta = {"name": sbcl_tds[0].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"form": sbcl_tds[1].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"require": sbcl_tds[2].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"ifOnline": ifOnline,"ifElec": ifElec,"structure": structure}sbcl_list.append(sbcl_meta)sbcl = {"title": "申报材料", "th": ["材料名称", "材料形式", "材料详细要求", "是否可网上提交", "是否支持电子档", "电子档格式"], "list": sbcl_list}except Exception as e:sbcl = {"title": "申报材料", "th": ["材料名称", "材料形式", "材料详细要求", "是否可网上提交", "是否支持电子档", "电子档格式"], "list": []}# 服务表格下载fwbgxz = ""try:fwbgxz_trs = divs[3].find_all("tr")[1:]fwbgxz_list = []for fwbgxz_tr in fwbgxz_trs:fwbgxz_tds = fwbgxz_tr.find_all("td")eleName_ = re.findall(r"downloadDocAddr\('(.*?)','(.*?)'\)", fwbgxz_tds[1].find("a")["onclick"])[0]exampleTable_ = re.findall(r"downloadDocAddr\('(.*?)','(.*?)'\)", fwbgxz_tds[2].find("a")["onclick"])[0]eleName = ""exampleTable = ""if eleName_[1]:eleName = "http://zwfw.chifeng.gov.cn/FileDownload?fileid=" + eleName_[0].replace(" ", "") + "&dispname=" + eleName_[1]if exampleTable_[1]:exampleTable = "http://zwfw.chifeng.gov.cn/FileDownload?fileid=" + exampleTable_[0].replace(" ", "") + "&dispname=" + exampleTable_[1]fwbgxz_meta = {"name": fwbgxz_tds[0].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"http://zwfw.chifeng.gov.cn/FileDownload?fileid=913&dispname=前往港澳通行证办事指南""eleName": eleName,"exampleTable": exampleTable,}fwbgxz_list.append(fwbgxz_meta)fwbgxz = {"title": "服务表格下载", "th": ["表格名称", "电子表格名称", "样表"], "list": fwbgxz_list}except Exception as e:fwbgxz = {"title": "服务表格下载", "th": ["表格名称", "电子表格名称", "样表"], "list": []}# 办理流程图bllct = ""try:bllct = {"title": "办理流程图", "th": [], "list": [divs[4].find("img")["src"]]}except Exception as e:bllct = {"title": "办理流程图", "th": [], "list": []}# 法定依据fdyj = ""try:fdyj = {"title": "法定依据", "th": [], "list": [divs[5].find("p").text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")]}except Exception as e:fdyj = {"title": "法定依据", "th": [], "list": []}# 收费情况sfqk = ""try:sfqk = {"title": "收费情况", "th": [], "list": [divs[6].find("p").text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")]}except Exception as e:sfqk = {"title": "收费情况", "th": [], "list": []}# 常见问题cjwt = ""try:cjwt = {"title": "常见问题", "th": [], "list": [divs[7].find("p").text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")]}except Exception as e:cjwt = {"title": "常见问题", "th": [], "list": []}extends = []extends.append(fwdx)extends.append(sbtj)extends.append(sbcl)extends.append(fwbgxz)extends.append(bllct)extends.append(fdyj)extends.append(sfqk)extends.append(cjwt)detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")self.pipToMysql(detail, simple, detailURL)#################################################              锡林郭勒盟部分                  #################################################def getXLGLMAreaList(self):'''获取锡林郭勒盟地区的区域数据:return:'''siteHTML = self.getHTML("http://zwfw.xlgl.gov.cn/portal/default/js/site/site.js")site_req_ = re.compile(r'siteDictJson = ({1:{.*?}});', re.S)site_req = re.compile(r'"oid":(.*?),".*?"siteName":"(.*?)"', re.S)siteList_ = re.findall(site_req, re.findall(site_req_, siteHTML)[0])jiedaoSiteJson_req = re.compile(r'jiedaoSiteJson = ({.*?}]});', re.S)jiedaoSiteJson = json.loads(re.findall(jiedaoSiteJson_req, siteHTML)[0].replace("{2", "{\"2").replace("],", "],\"").replace(":[", "\":["))siteList = []siteList.append((('1', '锡林郭勒盟'), []))for site in jiedaoSiteJson:siteList.append((siteList_[int(site) - 1], jiedaoSiteJson[site]))return siteList, siteList_def pipXLGLMLineArea(self, siteList, pid, city_id):'''存储锡林郭勒盟数据:param siteList::param pid::param city_id::return:'''for site in siteList:if site[1]:area_name = site[0][1]self.cursor.execute("select * from area where area_name like '%%%s%%' and pid = %d" % (area_name[:-1], pid))area_id = self.cursor.fetchall()[0][0]for townInfo in site[1]:town_name = townInfo["site_name"]try:self.cursor.execute("insert town(pid, city_id, area_id, town_name) values(%d, %d, %d, '%s')" % (pid, city_id, area_id, town_name))except Exception as e:passself.conn.commit()def getXLGLMDetail(self, url, areaType, areaID, pid):'''获取事项详细数据:param url::param areaType::param areaID::param pid::return:'''detailHTML = self.getHTML(url)detailSoup = BeautifulSoup(detailHTML, "html.parser")# 获取标题title = detailSoup.find("title").textdetail_trs = detailSoup.find("div", attrs={"class": "table_box"}).find_all("tr")# 获取部门信息deptName = detail_trs[5].find("td", attrs={"class": "td_width"}).textdeptID = self.getDeptID(deptName, areaType, areaID)# 获取行政类型信息categoryName = detail_trs[0].find("td", attrs={"class": "td_width"}).textcategoryID = self.getCategoryID(categoryName)# 获取区域id列表areaIDList = self.getIDList(areaType, areaID)#####################    列表信息    ####################simple = {}simple['pid'] = pidsimple['city_id'] = areaIDList["city_id"]simple['area_id'] = areaIDList["area_id"]simple['town_id'] = areaIDList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = title#####################    详细信息    ####################divs = detailSoup.find_all("div", attrs={"class": "tab_list"})# 受理条件sltj = ""try:sltj = {"title": "受理条件", "th": [], "list": [detailSoup.find("div", attrs={"class": "tab_list_text tab_list"}).find('p').text]}except Exception as e:sltj = {"title": "受理条件", "th": [], "list": []}# 办理材料blcl = ""try:blcl_trs = divs[1].find_all("tr")[1:]blcl_list = []for blcl_tr in blcl_trs:blcl_tds = blcl_tr.find_all("td")emptytable = "无"if blcl_tds[6].find("a"):emptytables = re.findall(r"download\('(.*?)','(.*?)'\)", blcl_tds[6].find("a")["href"])emptytable = "http://zwfw.xlgl.gov.cn/portal/bszn/download.shtml?attaOid=" + emptytables[0][0] + "&fileName=" + emptytables[0][1]eleTable = "无"if blcl_tds[7].find("a"):eleTables = re.findall(r"download\('(.*?)','(.*?)'\)", blcl_tds[7].find("a")["href"])eleTable = "http://zwfw.xlgl.gov.cn/portal/bszn/download.shtml?attaOid=" + eleTables[0][0] + "&fileName=" + eleTables[0][1]blcl_meta = {"name": blcl_tds[0].text,"type": blcl_tds[1].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"basic": blcl_tds[2].text,"nums": blcl_tds[3].text,"source": blcl_tds[4].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"standard": blcl_tds[5].text,"emptytable": emptytable,"eleTable": eleTable,}blcl_list.append(blcl_meta)blcl = {"title": "办理材料", "th": ["材料名称", "材料类型", "材料规格", "材料份数", "来源渠道", "受理标准", "材料样表", "电子表格", "填报须知"], "list": blcl_list}except Exception as e:blcl = {"title": "办理材料", "th": ["材料名称", "材料类型", "材料规格", "材料份数", "来源渠道", "受理标准", "材料样表", "电子表格", "填报须知"], "list": []}# 办理流程图lct = ""try:lct = {"title": "办理流程图", "th": [], "list": [divs[2].find("img")["src"]]}except Exception as e:lct = {"title": "办理流程图", "th": [], "list": []}# 设定依据sdyj = ""try:sdyj = {"title": "设定依据", "th": [], "list": [divs[3].find("pre").text.replace("\u3000", "")]}except Exception as e:sdyj = {"title": "设定依据", "th": [], "list": []}# 收费依据和标准sfyjbz = ""try:sfyjbz = {"title": "收费依据和标准", "th": [], "list": [divs[4].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")]}except Exception as e:sfyjbz = {"title": "收费依据和标准", "th": [], "list": []}# 办理环节blhj = ""try:blhj_trs = divs[5].find_all("tr")[1:]blhj_list = []for blhj_tr in blhj_trs:blhj_tds = blhj_tr.find_all("td")blhj_meta = {"name": blhj_tds[0].text,"timeLimit": blhj_tds[1].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"ifSpec": blhj_tds[2].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"porType": blhj_tds[3].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),}blhj_list.append(blhj_meta)blhj = {"title": "办理环节", "th": ["环节名称", "办理时限", "是否允许特别程序", "特别程序类型"], "list": blhj_list}except Exception as e:blhj = {"title": "办理环节", "th": ["环节名称", "办理时限", "是否允许特别程序", "特别程序类型"], "list": []}extends = []extends.append(sltj)extends.append(blcl)extends.append(lct)extends.append(sdyj)extends.append(sfyjbz)extends.append(blhj)detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")self.pipToMysql(detail, simple, url)def xlglmParser(self, url, pid):'''锡林郭勒盟数据解析:param url::param pid::return:'''# 获取城市信息self.cursor.execute("select * from city where pid = %d and city_name = '锡林郭勒盟'" % pid)city_id = self.cursor.fetchall()[0][0]# 获取区域信息siteList = self.getXLGLMAreaList()# 存储区域数据(运行一次即可)# self.pipXLGLMLineArea(siteList[0], pid, city_id)# 分区域请求for site in siteList[1]:areaType = 0areaID = 0if int(site[0]) == 1:areaType = 1self.cursor.execute("select * from city where city_name = '%s' and pid = %d" % (site[1], pid))areaID = self.cursor.fetchall()[0][0]elif 15 > int(site[0]) > 1:areaType = 2self.cursor.execute("select * from area where area_name like '%%%s%%' and pid = %d" % (site[1][:-1], pid))areaID = self.cursor.fetchall()[0][0]else:areaType = 3self.cursor.execute("select * from town where town_name like '%%%s%%' and pid = %d" % (site[1][:-1], pid))areaID = self.cursor.fetchall()[0][0]# 获取该区域的部门列表deptList_str = json.loads(self.postHTML("http://zwfw.xlgl.gov.cn/portal/orgService/orgListJson.shtml", {"siteId": site[0]}))["data"]dept_req = re.compile(r"name: '(.*?)'", re.S)deptList_ = re.findall(dept_req, deptList_str)deptTupleList = []for dept in deptList_:deptTupleList.append((None, dept))deptList = self.getDeptList(deptTupleList)# 更新部门列表至所在区域self.updateDeptList(areaType, areaID, deptList)# 获取总页数totalPage = json.loads(self.getHTML("http://zwfw.xlgl.gov.cn/portal/sunshine/query.shtml?random=0.8224049550642463&siteId=" + site[0] + "&sxlx=&directoryName=&pageNumber=1"))["maxPageNumber"]# 分页请求for page in range(1, totalPage + 1):currListURL = "http://zwfw.xlgl.gov.cn/portal/sunshine/query.shtml?random=0.8224049550642463&siteId=" + site[0] + "&sxlx=&directoryName=&pageNumber=" + str(page)currList = json.loads(self.getHTML(currListURL))["datas"]for currListMeta in currList:if currListMeta["sxDirectoryChildList"]:for sxDirectoryChildList in currListMeta["sxDirectoryChildList"]:for serviceList in sxDirectoryChildList["serviceList"]:detailURL = "http://zwfw.xlgl.gov.cn/portal/bszn/view.shtml?serviceOid=" + serviceList["serviceOid"]self.getXLGLMDetail(detailURL, areaType, areaID, pid)else:for serviceList in currListMeta["serviceList"]:detailURL = "http://zwfw.xlgl.gov.cn/portal/bszn/view.shtml?serviceOid=" + serviceList["serviceOid"]self.getXLGLMDetail(detailURL, areaType, areaID, pid)#################################################              乌兰察布市部分                  #################################################def getWLCBAreaList(self):'''获取区域列表:return:'''# 从市级获取区级idareaURL = "http://zwfw.wulanchabu.gov.cn/baseinfo/listareas/447?tm=" + str(time.time()).replace(".", "")[:-4]areaInfo = json.loads(self.getHTML(areaURL))["result"]["data"]# 通过区县id获取乡镇街道idareaList = []townList = []for area in areaInfo:townURL = "http://zwfw.wulanchabu.gov.cn/baseinfo/listareas/" + area["areaId"] + "?tm=" + str(time.time()).replace(".", "")[:-4]townInfo = json.loads(self.getHTML(townURL))["result"]["data"]townList_ = []for town in townInfo:townList_.append((town["areaId"], town["name"]))townList.append(town["name"])areaList.append(((area["areaId"], area["name"]), townList_))return areaList, townListdef pipLineWLCBArea(self, areaList, pid, city_id):'''存储乡镇数据:param areaList::param pid::param city_id::return:'''for areas in areaList:try:area_name = areas[0][1]self.cursor.execute("select * from area where area_name = '%s' and city_id = %d" % (area_name, city_id))area_id = self.cursor.fetchall()[0][0]for town in areas[1]:self.cursor.execute("insert town(pid, city_id, area_id, town_name) values(%d, %d, %d, '%s')" % (pid, city_id, area_id, town[1]))self.conn.commit()except Exception as e:continuedef wlcbParser(self, url, pid):'''乌兰察布地区数据解析:param url::param pid::return:'''# 获取城市idself.cursor.execute("select * from city where city_name = '%s' and pid = %d" % ('乌兰察布市', pid))city_id = self.cursor.fetchall()[0][0]# 获取区域数据areaList, townList = self.getWLCBAreaList()# 存取街道数据(执行一次即可)# self.pipLineWLCBArea(areaList, pid, city_id)# 添加市级别数据areaList.append((('447', '乌兰察布市'), []))# 分区域请求for area in areaList:reqList = []areaName = area[0][1]reqList.append(area[0])for town in area[1]:reqList.append(town)for site in reqList:areaType = 0areaID = 0try:if site[1] == '乌兰察布市':areaType = 1areaID = city_idelif site[1] not in townList:areaType = 2self.cursor.execute("select * from area where area_name = '%s' and city_id = %d" % (areaName, city_id))areaID = self.cursor.fetchall()[0][0]else:areaType = 3self.cursor.execute("select * from town where town_name = '%s' and city_id = %d" % (areaName, city_id))areaID = self.cursor.fetchall()[0][0]except Exception as e:continue# 获取当前区域的部门列表deptURL = "http://zwfw.wulanchabu.gov.cn/baseinfo/listdepts/" + site[0] + "?tm=" + str(time.time()).replace(".", "")[:-4]deptList_ = json.loads(self.getHTML(deptURL))["result"]["data"]deptTupleList = []for dept in deptList_:deptTupleList.append((dept["deptId"], dept["deptName"]))# 获取部门字符串列表deptList = self.getDeptList(deptTupleList)# 更新该区域的部门列表信息self.updateDeptList(areaType, areaID, deptList)# 获取总页数totalPage = json.loads(self.getHTML("http://zwfw.wulanchabu.gov.cn/personal/listshixiang?currentPage=1&showCount=8&userType=4154&area=" + site[0] + "&isLevel=true&tm=" + str(time.time()).replace(".", "")[:-4]))["result"]["totalPage"]# 分页请求for page in range(1, totalPage + 1):currListURL = "http://zwfw.wulanchabu.gov.cn/personal/listshixiang?currentPage=" + str(page) + "&showCount=8&userType=4154&area=" + site[0] + "&isLevel=true&tm=" + str(time.time()).replace(".", "")[:-4]currList = json.loads(self.getHTML(currListURL).replace("\xa0", ""))["result"]["data"]# 取出每个大标题for currMeta in currList:catalogId = currMeta["catalogId"]# 分事项请求for shiXiangSubs in currMeta["shiXiangSubs"]:qshfjzrxxId = shiXiangSubs["qshfjzrxxId"]title = shiXiangSubs["mlqdsxmc"]detailURL = "http://zwfw.wulanchabu.gov.cn/personal/gettopicmenudetail/" + catalogId + "/" + qshfjzrxxId + "?tm=" + str(time.time()).replace(".", "")[:-4]detailJson = json.loads(self.getHTML(detailURL))["result"]["data"]# 获取行政类型信息categoryName = detailJson["qshfjzrxx"]["CATALOGUESTYPE"]categoryID = self.getCategoryID(categoryName)# 获取部门信息deptName = detailJson["qshfjzrxx"]["TOPORG"]deptID = self.getDeptID(deptName, areaType, areaID)# 获取区域id列表areaIDList = self.getIDList(areaType, areaID)##################  列表信息   #################simple = {}simple['pid'] = pidsimple['city_id'] = areaIDList["city_id"]simple['area_id'] = areaIDList["area_id"]simple['town_id'] = areaIDList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = title##################  详细信息   ################## 受理条件sltj = ""try:sltj = {"title": "受理条件", "th": [], "list": [detailJson["sqtjhxz"]["SLTJNR"].replace("\t", "")]}except Exception as e:sltj = {"title": "受理条件", "th": [], "list": []}# 流程图lct = ""try:lct = {"title": "流程图", "th": [], "list": [detailJson["blhj"]["BLLCT"]]}except Exception as e:lct = {"title": "流程图", "th": [], "list": []}# 办理流程bllc = ""try:bllc = {"title": "办理流程", "th": [], "list": [detailJson["blhj"]["BLLCMS"]]}except Exception as e:bllc = {"title": "办理流程", "th": [], "list": []}# 申请材料sqcl = ""try:sqcl_clqd = detailJson["clqd"]sqcl_list = []for clqd in sqcl_clqd:clqdChild = clqd["clqdChild"]require = ""if clqdChild:require = "介质要求："for clqdChild_ in clqdChild:require = require + clqdChild_["chooseValue"] + " "if clqd["FYJFS"]:require = require + "<br />复印件份数：" + str(clqd["FYJFS"])if clqd["CLFS"]:require = require + "<br />原件份数：" + str(clqd["CLFS"])sqcl_meta = {"name": clqd["CLNAME"],"require": require,"source": clqd["LYQDNAME"],"know": clqd["TBXZ"],"emptyTab": clqd["KONGBIAOPATH"][0]["FILEPATH"] if clqd["KONGBIAOPATH"] else "","exampleTab": clqd["YANGBENPATH"][0]["FILEPATH"] if clqd["YANGBENPATH"] else ""}sqcl_list.append(sqcl_meta)sqcl = {"title": "申请材料", "th": ["材料名称", "材料要求", "来源", "填表须知", "空表下载", "样表下载"], "list": sqcl_list}except Exception as e:sqcl = {"title": "申请材料", "th": ["材料名称", "材料要求", "来源", "填表须知", "空表下载", "样表下载"], "list": []}# 咨询监督zxjd = ""try:zxjd_list = {}zxjd_content = detailJson["blfs"]if zxjd_content["HOTLINE"] != '0':zxjd_list["HOTLINE"] = zxjd_content["HOTLINE"].replace("\t", "")else:zxjd_list["HOTLINE"] = "无"if zxjd_content["SUPERVISORY_TELEPHONE"] != '0':zxjd_list["SUPERVISORY_TELEPHONE"] = zxjd_content["SUPERVISORY_TELEPHONE"].replace("\t", "")else:zxjd_list["SUPERVISORY_TELEPHONE"] = "无"zxjd = {"title": "咨询监督", "th": ["咨询方式", "监督方式"], "list": zxjd_list}except Exception as e:zxjd = {"title": "咨询监督", "th": ["咨询方式", "监督方式"], "list": []}# 收费许可sfxk = ""try:sfxk_sfxx = detailJson["sfxx"]sfxk_list = []for sfxx in sfxk_sfxx:sfxk_meta = {"name": sfxx["SFNAME"],"type": sfxx["SFTYPE"],"style": sfxx["SFFS"],"standard": sfxx["SFBZ"],"base": sfxx["SFYJ"],"ifConv": sfxx["IS_JM"],"comm": sfxx["JMSM"]}sfxk_list.append(sfxk_meta)sfxk = {"title": "收费许可", "th": ["收费项目名称", "收费种类", "收费方式", "收费标准", "收费依据", "是否减免", "减免说明"], "list": sfxk_list}except Exception as e:sfxk = {"title": "收费许可", "th": ["收费项目名称", "收费种类", "收费方式", "收费标准", "收费依据", "是否减免", "减免说明"], "list": []}# 设定依据sdyj = ""try:sdyj = {"title": "设定依据", "th": [], "list": [detailJson["qshfjzrxx"]["SDBASIS"]]}except Exception as e:sdyj = {"title": "设定依据", "th": [], "list": []}# 常见问题cjwt = ""try:cjwts = detailJson["cjwt"]cjwt_list = []for cjwts_ in cjwts:cjwt_meta = {"event": cjwts_["WTBT"],"comm": cjwts_["WTJD"],}cjwt_list.append(cjwt_meta)cjwt = {"title": "常见问题", "th": ["事项", "说明"], "list": cjwt_list}except Exception as e:cjwt = {"title": "常见问题", "th": ["事项", "说明"], "list": []}extends = []extends.append(sltj)extends.append(lct)extends.append(bllc)extends.append(sqcl)extends.append(zxjd)extends.append(sfxk)extends.append(sdyj)extends.append(cjwt)detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")self.pipToMysql(detail, simple, detailURL)#################################################              巴彦淖尔市部分                  #################################################def getBYCAreaList(self):'''获取区域列表:return:'''siteHTML = self.getHTML("http://zwzx.bynr.gov.cn/portal/default/js/site/site.js")site_req = re.compile(r'({1:{.*?}})', re.S)siteLists = json.loads(re.findall(site_req, siteHTML)[0].replace(":{", "\":{").replace("},", "},\"").replace("{1", "{\"1"))areaList = []for site in siteLists:areaList.append((siteLists[site]["oid"], siteLists[site]["siteName"]))return areaListdef getBYCLMDetail(self, url, areaType, areaID, pid):'''获取事项详细信息:param url::param areaType::param areaID::param pid::return:'''detailHTML = self.getHTML(url)detailSoup = BeautifulSoup(detailHTML, "html.parser")# 获取标题title = detailSoup.find("title").textdetail_trs = detailSoup.find("div", attrs={"class": "table_box"}).find_all("tr")# 获取部门信息deptName = detail_trs[5].find("td", attrs={"class": "td_width"}).textdeptID = self.getDeptID(deptName, areaType, areaID)# 获取行政类型信息categoryName = detail_trs[0].find("td", attrs={"class": "td_width"}).textcategoryID = self.getCategoryID(categoryName)# 获取区域id列表areaIDList = self.getIDList(areaType, areaID)#####################    列表信息    ####################simple = {}simple['pid'] = pidsimple['city_id'] = areaIDList["city_id"]simple['area_id'] = areaIDList["area_id"]simple['town_id'] = areaIDList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = title#####################    详细信息    ####################divs = detailSoup.find_all("div", attrs={"class": "tab_list"})# 受理条件sltj = ""try:sltj = {"title": "受理条件", "th": [], "list": [detailSoup.find("div", attrs={"class": "tab_list_text tab_list"}).find('p').text]}except Exception as e:sltj = {"title": "受理条件", "th": [], "list": []}# 办理材料blcl = ""try:blcl_trs = divs[1].find_all("tr")[1:]blcl_list = []for blcl_tr in blcl_trs:blcl_tds = blcl_tr.find_all("td")table = "无"if blcl_tds[5].find("a"):table = blcl_tds[5].find("a")["href"]eleTable = "无"if blcl_tds[6].find("a"):eleTable = blcl_tds[6].find("a")["href"]blcl_meta = {"name": blcl_tds[0].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"type": blcl_tds[1].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"nums": blcl_tds[2].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"source": blcl_tds[3].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"standard": blcl_tds[4].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"table": table,"eleTable": eleTable,"neccessary": blcl_tds[7].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),}blcl_list.append(blcl_meta)blcl = {"title": "办理材料", "th": ["材料名称", "材料类型", "材料份数", "来源渠道", "受理标准", "材料样表", "电子表格", "是否必须"], "list": blcl_list}except Exception as e:blcl = {"title": "办理材料", "th": ["材料名称", "材料类型", "材料份数", "来源渠道", "受理标准", "材料样表", "电子表格", "是否必须"], "list": []}# 办理流程图lct = ""try:lct = {"title": "办理流程图", "th": [], "list": [divs[2].find("img")["src"]]}except Exception as e:lct = {"title": "办理流程图", "th": [], "list": []}# 设定依据sdyj = ""try:sdyj = {"title": "设定依据", "th": [], "list": [divs[3].text.replace("\u3000", "").replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "").replace("\xa0", "")]}except Exception as e:sdyj = {"title": "设定依据", "th": [], "list": []}# 收费依据和标准sfyjbz = ""try:sfyjbz = {"title": "收费依据和标准", "th": [], "list": [divs[4].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")]}except Exception as e:sfyjbz = {"title": "收费依据和标准", "th": [], "list": []}# 办理环节blhj = ""try:blhj_trs = divs[5].find_all("tr")[1:]blhj_list = []for blhj_tr in blhj_trs:blhj_tds = blhj_tr.find_all("td")blhj_meta = {"name": blhj_tds[0].text,"timeLimit": blhj_tds[1].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"ifSpec": blhj_tds[2].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"porType": blhj_tds[3].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),}blhj_list.append(blhj_meta)blhj = {"title": "办理环节", "th": ["环节名称", "办理时限", "是否允许特别程序", "特别程序类型"], "list": blhj_list}except Exception as e:blhj = {"title": "办理环节", "th": ["环节名称", "办理时限", "是否允许特别程序", "特别程序类型"], "list": []}extends = []extends.append(sltj)extends.append(blcl)extends.append(lct)extends.append(sdyj)extends.append(sfyjbz)extends.append(blhj)detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")self.pipToMysql(detail, simple, url)def bycParser(self, url, pid):'''巴彦淖尔区域数据解析:param url::param pid::return:'''# 获取城市idself.cursor.execute("select * from city where city_name = '巴彦淖尔市' and pid = %d" % pid)city_id = self.cursor.fetchall()[0][0]# 获取区域列表areaList = self.getBYCAreaList()# 分区域请求for area in areaList:# 获取区域信息areaName = area[1]areaType = 0areaID = 0if areaName == "巴彦淖尔市":areaType = 1areaID = city_idelse:areaType = 2try:self.cursor.execute("select * from area where area_name = '%s'" % areaName)areaID = self.cursor.fetchall()[0][0]except Exception as e:continue# 列表首页urlindexURL = "http://zwzx.bynr.gov.cn/portal/sunshine/list.shtml?siteId=" + str(area[0])indexHTML = self.getHTML(indexURL)indexSoup = BeautifulSoup(indexHTML, "html.parser")# 获取部门列表dept_as = indexSoup.find("div", attrs={"class": "list_txt"}).find_all("a")deptTupleList = []for dept_a in dept_as:deptTupleList.append((None, dept_a.text))deptList = self.getDeptList(deptTupleList)# 更新该区域的部门列表self.updateDeptList(areaType, areaID, deptList)# 获取总页数totalPage = json.loads(self.getHTML("http://zwzx.bynr.gov.cn/portal/sunshine/query.shtml?random=0.6549039205102518&siteId=" + str(area[0]) + "&sxlx=&directoryName=&pageNumber=1"))["maxPageNumber"]for page in range(1, totalPage + 1):currListURL = "http://zwzx.bynr.gov.cn/portal/sunshine/query.shtml?random=0.6549039205102518&siteId=" + str(area[0]) + "&sxlx=&directoryName=&pageNumber=" + str(page)currListResp = json.loads(self.getHTML(currListURL))["datas"]for currListMeta in currListResp:if currListMeta["sxDirectoryChildList"]:for sxDirectoryChildList in currListMeta["sxDirectoryChildList"]:for serviceList in sxDirectoryChildList["serviceList"]:detailURL = "http://zwzx.bynr.gov.cn/portal/bszn/view.shtml?serviceOid=" + serviceList["serviceOid"]self.getBYCLMDetail(detailURL, areaType, areaID, pid)else:for serviceList in currListMeta["serviceList"]:detailURL = "http://zwzx.bynr.gov.cn/portal/bszn/view.shtml?serviceOid=" + serviceList["serviceOid"]self.getBYCLMDetail(detailURL, areaType, areaID, pid)#################################################              乌海市部分                     #################################################def getWHAreaList(self):'''获取区域列表:return:'''siteURL = "http://zwdt.wuhai.gov.cn/portal/default/js/site/site.js"siteHTML = self.getHTML(siteURL)site_req = re.compile(r'({10:{.*?}})', re.S)siteList = json.loads(re.findall(site_req, siteHTML)[0].replace(":{", "\":{").replace("},", "},\"").replace("{10", "{\"10"))areaList = []for site in siteList:areaList.append((str(siteList[site]["oid"]), siteList[site]["siteName"]))return areaListdef whParser(self, url, pid):'''乌海市数据解析:param url::param pid::return:'''# 获取城市idself.cursor.execute("select * from city where city_name = '%s' and pid = %d" % ('乌海市', pid))city_id = self.cursor.fetchall()[0][0]areaIDList = self.getIDList(1, city_id)# 获取分部门的请求地址deptListHTML = self.getHTML(url, encoding="gbk")deptListSoup = BeautifulSoup(deptListHTML, "html.parser")deptList_divs = deptListSoup.find_all("div", attrs={"class": "item"})deptTupleList = []for deptList_div in deptList_divs:deptTupleList.append((url + deptList_div.find("a")["href"], deptList_div.find("a").text))deptList = self.getDeptList(deptTupleList)# 更新所在区域的部门列表self.updateDeptList(1, city_id, deptList)# 分部门请求for deptTuple in deptTupleList:currDeptListHTML = self.getHTML(deptTuple[0], encoding="gbk")currDeptListSoup = BeautifulSoup(currDeptListHTML, "html.parser")# 获取部门信息deptName = deptTuple[1]deptID = self.getDeptID(deptName, 1, city_id)# 获取行政类别categoryName_as = currDeptListSoup.find("div", attrs={"class": "depart_title"}).find_all("a")# 分行政类型请求for categoryName_a in categoryName_as:categoryName = categoryName_a.textcategoryID = self.getCategoryID(categoryName)categoryNameURL = url + categoryName_a["href"].replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")categoryName_ = categoryNameURL[categoryNameURL.index("item=") + 5:].encode("gb2312")categoryNameURL = categoryNameURL[:categoryNameURL.index("item=")] + urllib.parse.urlencode({"item": categoryName_})currListHTML = self.getHTML(categoryNameURL)currListSoup = BeautifulSoup(currListHTML, "html.parser")# 获取总页数totalPage = int(currListSoup.find("div", attrs={"class": "pages"}).find("font").text)if totalPage != 0:# 分页请求for page in range(totalPage):currURL = categoryNameURL + "&setpage=1&setid=" + str(page * 20)currHTML = self.getHTML(currURL)currSoup = BeautifulSoup(currHTML, "html.parser")aimList = currSoup.find("table", attrs={"class": "infoList"}).find_all("a")# 获取当前页事项列表for detail in aimList:detailURL = url + detail["href"]detailHTML = self.getHTML(detailURL, encoding="gbk").lower()detailSoup = BeautifulSoup(detailHTML, "html.parser")detail_trs = detailSoup.find("div", attrs={"class": "detail"}).find_all("tr")title = detail_trs[1].find_all("td")[1].text.replace(" ", "").replace(" ", "").replace("\t", "").replace("\n", "").replace("\r", "").replace("\u3000", "")simple = {}simple['pid'] = areaIDList["pid"]simple['city_id'] = areaIDList["city_id"]simple['area_id'] = areaIDList["area_id"]simple['town_id'] = areaIDList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = title# 设定依据sdyj = ""try:sdyj = {"title": "设定依据", "th": [], "list": [detail_trs[4].find_all("td")[1].text.replace(" ", "").replace("\t", "").replace("\n", "").replace("\r", "").replace("\u3000", "")[1: -1]]}except Exception as e:sdyj = {"title": "设定依据", "th": [], "list": []}# 办理基本流程bllc = ""try:bllc = {"title": "办理基本流程", "th": [], "list": [detail_trs[5].find_all("td")[1].text.replace(" ", "").replace("\t", "").replace("\n", "").replace("\r", "")]}except Exception as e:bllc = {"title": "办理基本流程", "th": [], "list": []}# 备注bz = ""try:bz = {"title": "备注", "th": [], "list": [detail_trs[6].find_all("td")[1].text.replace(" ", "").replace("\t", "").replace("\n", "").replace("\r", "")]}except Exception as e:bz = {"title": "备注", "th": [], "list": []}extends = []extends.append(sdyj)extends.append(bllc)extends.append(bz)detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")self.pipToMysql(detail, simple, detailURL)#################################################              阿拉善盟部分                   #################################################def getALSMAreaList(self):'''获取区域列表:return:'''siteURL = "http://alsmzwfwzx.gov.cn/portal/default/js/site/site.js"siteHTML = self.getHTML(siteURL)site_req = re.compile(r'({4:{.*?}})', re.S)siteList = json.loads(re.findall(site_req, siteHTML)[0].replace(":{", "\":{").replace("},", "},\"").replace("{4", "{\"4"))areaList = []for site in siteList:areaList.append((str(siteList[site]["oid"]), siteList[site]["siteName"]))return areaListdef getALSMDetail(self, url, areaType, areaID, pid):'''获取详细信息:param url::param areaType::param areaID::param pid::return:'''try:detailHTML = self.getHTML(url)detailSoup = BeautifulSoup(detailHTML, "html.parser")# 获取标题title = detailSoup.find("title").textdetail_trs = detailSoup.find("div", attrs={"class": "table_box"}).find_all("tr")# 获取部门信息deptName = detail_trs[5].find("td", attrs={"class": "td_width"}).textdeptID = self.getDeptID(deptName, areaType, areaID)# 获取行政类型信息categoryName = detail_trs[0].find("td", attrs={"class": "td_width"}).textcategoryID = self.getCategoryID(categoryName)# 获取区域id列表areaIDList = self.getIDList(areaType, areaID)#####################    列表信息    ####################simple = {}simple['pid'] = pidsimple['city_id'] = areaIDList["city_id"]simple['area_id'] = areaIDList["area_id"]simple['town_id'] = areaIDList["town_id"]simple['categoryID'] = categoryIDsimple['categoryName'] = categoryNamesimple['deptID'] = deptIDsimple['deptName'] = deptNamesimple['title'] = title#####################    详细信息    ####################divs = detailSoup.find_all("div", attrs={"class": "tab_list"})# 受理条件sltj = ""try:sltj = {"title": "受理条件", "th": [],"list": [detailSoup.find("div", attrs={"class": "tab_list_text tab_list"}).find('p').text]}except Exception as e:sltj = {"title": "受理条件", "th": [], "list": []}# 办理材料blcl = ""try:blcl_trs = divs[1].find_all("tr")[1:]blcl_list = []for blcl_tr in blcl_trs:blcl_tds = blcl_tr.find_all("td")emptytable = "无"if blcl_tds[5].find("a"):emptytable = blcl_tds[5].find("a")["href"]eleTable = "无"if blcl_tds[6].find("a"):eleTable = blcl_tds[6].find("a")["href"]blcl_meta = {"name": blcl_tds[0].text.replace("\xa0", ""),"type": blcl_tds[1].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"basic": blcl_tds[2].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"source": blcl_tds[3].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"standard": blcl_tds[4].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),"emptytable": emptytable,"eleTable": eleTable,"know": blcl_tds[7].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", ""),}blcl_list.append(blcl_meta)blcl = {"title": "办理材料", "th": ["材料名称", "材料类型", "材料规格", "来源渠道", "受理标准", "材料样表", "电子表格", "填报须知"],"list": blcl_list}except Exception as e:blcl = {"title": "办理材料", "th": ["材料名称", "材料类型", "材料规格", "来源渠道", "受理标准", "材料样表", "电子表格", "填报须知"],"list": []}# 办理流程图lct = ""try:lct = {"title": "办理流程图", "th": [], "list": [divs[2].find("img")["src"]]}except Exception as e:lct = {"title": "办理流程图", "th": [], "list": []}# 设定依据sdyj = ""try:sdyj = {"title": "设定依据", "th": [], "list": [divs[3].text.replace("\u3000", "").replace("\r", "").replace("\n", "").replace("\t", "")]}except Exception as e:sdyj = {"title": "设定依据", "th": [], "list": []}# 收费依据和标准sfyjbz = ""try:sfyjbz = {"title": "收费依据和标准", "th": [],"list": [divs[4].text.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")]}except Exception as e:sfyjbz = {"title": "收费依据和标准", "th": [], "list": []}# 办理环节blhj = ""try:blhj = {"title": "办理环节", "th": [], "list": [divs[6].find("img")["src"]]}except Exception as e:blhj = {"title": "办理环节", "th": [], "list": []}extends = []extends.append(sltj)extends.append(blcl)extends.append(lct)extends.append(sdyj)extends.append(sfyjbz)extends.append(blhj)detail = {"title": title,"extends": extends}detail = str(detail).replace("'", "\"")self.pipToMysql(detail, simple, url)except Exception as e:passdef alsmParser(self, url, pid):'''阿拉善盟区域解析:param url::param pid::return:'''# 获取城市idself.cursor.execute("select * from city where city_name = '%s' and pid = %d" % ('阿拉善盟', pid))city_id = self.cursor.fetchall()[0][0]# 获取区域数据siteList = self.getALSMAreaList()# 分区域请求for site in siteList:areaType = 0areaID = 0areaName = site[1]if areaName == "阿拉善盟":areaType = 1areaID = city_idelse:areaType = 2self.cursor.execute("select * from area where area_name = '%s' and pid = %d" % (areaName, pid))areaID = self.cursor.fetchall()[0][0]# 获取该区域的部门列表indexHTML = self.getHTML("http://alsmzwfwzx.gov.cn/portal/sunshine/list.shtml?siteId=" + str(site[0]))indexSoup = BeautifulSoup(indexHTML, "html.parser")dept_as = indexSoup.find("div", attrs={"class": "list_category list_partment clearfix"}).find_all("a")[1:]deptTupleList = []for dept_a in dept_as:endIndex = dept_a.text.index("(")deptTupleList.append((None, dept_a.text[:endIndex]))deptList = self.getDeptList(deptTupleList)# 更新部门列表至该区域self.updateDeptList(areaType, areaID, deptList)# 获取总页数totalPage = json.loads(self.getHTML("http://alsmzwfwzx.gov.cn/portal/sunshine/query.shtml?random=0.5702250487302503&siteId=" + site[0] + "&sxlx=&directoryName=&pageNumber=1"))["maxPageNumber"]# 分页请求for page in range(1, totalPage + 1):currListURL = "http://alsmzwfwzx.gov.cn/portal/sunshine/query.shtml?random=0.5702250487302503&siteId=" + \site[0] + "&sxlx=&directoryName=&pageNumber=" + str(page)currList = json.loads(self.getHTML(currListURL))["datas"]for currListMeta in currList:if currListMeta["sxDirectoryChildList"]:for sxDirectoryChildList in currListMeta["sxDirectoryChildList"]:for serviceList in sxDirectoryChildList["serviceList"]:detailURL = "http://alsmzwfwzx.gov.cn/portal/bszn/view.shtml?serviceOid=" + serviceList["serviceOid"]self.getALSMDetail(detailURL, areaType, areaID, pid)else:for serviceList in currListMeta["serviceList"]:detailURL = "http://alsmzwfwzx.gov.cn/portal/bszn/view.shtml?serviceOid=" + serviceList["serviceOid"]self.getALSMDetail(detailURL, areaType, areaID, pid)def main(self):# 获取省份idself.cursor.execute("select * from province where province_name = '内蒙古自治区'")pid = self.cursor.fetchall()[0][0]# 获取区域列表# indexHTML = self.getHTML(self.indexURL)# areaList = self.getAreaList(indexHTML)# 区域存储# self.pipLineArea(areaList, pid)# 内蒙区域解析nmURL = "http://zwfw.nmg.gov.cn/zwfw/sxcx/itemList/gr_index.do?webId=1&zt=402881e55f941b97015f941d90380000&deptid="# self.nmParser(nmURL)# 呼和浩特市以及下级区域解析【该区域有ip频率限制，需要用到代理池】hsURL = "http://zwfw.huhhot.gov.cn/hs/public/index"# self.hsParser(hsURL, pid)# 包头市以及下级区域解析(无区站)btURL = "http://www.baotou.gov.cn/xxgk/qzqd.htm"# self.btParser(btURL, pid)# 呼伦贝尔市以及下级区域解析(无区站)hlbeURL = "http://www.hlbe.gov.cn/opennessTerms/branch/?type_terms=qzqd"# self.hlbeParser(hlbeURL, pid)# 兴安盟以及下级区域解析(站点无清单数据)xamURL = "http://www.xam.gov.cn/xam/_300473/_300600/bm71/mwtj61/index.html"# 通辽市以及下级区域解析tlURL = "http://zwfw.tongliao.gov.cn/mhwz/portal/bszn/gr/list.shtml?siteId=1"# self.tlParser(tlURL, pid)# 赤峰市以及下级区域解析(仅仅可以爬取区域信息，以及赤峰市和一个区的数据)cfURL = "http://zwfw.chifeng.gov.cn/grbsIndex.action?districtId=150400"# self.cfParser(cfURL, pid)# 锡林郭勒盟以及下级区域解析xlglmURL = "http://zwfw.xlgl.gov.cn/portal/sunshine/list.shtml?siteId=1"# self.xlglmParser(xlglmURL, pid)# 乌兰察布市以及下级区域解析wlcbURL = "http://zwfw.wulanchabu.gov.cn/personal.html?userType=4154"# self.wlcbParser(wlcbURL, pid)# 鄂尔多斯市以及下级区域解析(表格数据，暂不分析)eedsURL = "http://zwfw.ordos.gov.cn/departmentListTemporary.action?districtId=8a96998a48f7fb180148f90067d60105"# 巴彦淖尔市以及下级区域解析bycURL = "http://zwzx.bynr.gov.cn/portal/sunshine/list.shtml?siteId=1"# self.bycParser(bycURL, pid)# 乌海市以及下级区域解析（只有少量的市级数据）whURL = "http://218.21.240.34/xzqlgk/"# self.whParser(whURL, pid)# 阿拉善盟以及下级区域解析alsmURL = "http://alsmzwfwzx.gov.cn/portal/sunshine/list.shtml?siteId=4"# self.alsmParser(alsmURL, pid)# 满洲里市以及下级区域解析（无数据）mzlURL = "http://mzlxzsp.gov.cn/icity/public/index"# 二连浩特以及下级市区域解析(很少数据)elhtURL = "http://zwfw.elht.gov.cn:8088/zwfw/ygzw"if __name__ == "__main__":nm = NeiMeng()nm.main()