python 正则规则测试 GUI PyQt5
2019-09-07 python 1603
import sys from PyQt5.QtWidgets import QMainWindow, QWidget, QApplication, QPushButton, QFileDialog, QLineEdit, QHBoxLayout, \ QVBoxLayout, QTextEdit, QDesktopWidget, QLabel, QDateTimeEdit, QCheckBox, QTableView, QHeaderView, \ QAbstractItemView, QMessageBox from PyQt5.QtCore import QThread, pyqtSignal, QObject, QDateTime, Qt from PyQt5.QtGui import QStandardItemModel, QStandardItem import os import time import re, requests class CombWidget(QObject): def __init__(self): super().__init__() def combHBox(self, lst: list): h = QHBoxLayout() for x, y in lst: h.addWidget(x, y) wg = QWidget() wg.setLayout(h) return wg def combHBoxAlign(self, lst: list): h = QHBoxLayout() for x, y, align in lst: h.addWidget(x, y, align) wg = QWidget() wg.setLayout(h) return wg def combVBox(self, lst: list): h = QVBoxLayout() for x, y in lst: h.addWidget(x, y) wg = QWidget() wg.setLayout(h) return wg error_index = 0 def get_content(url, charset="utf-8", timeout=30): global error_index content = "" if error_index > 3: # os.system('cmd/c start novel.exe') print("%d次错误退出" % error_index) return try: headers = {"User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:36.0) Gecko/20100101 Firefox/36.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", # "Accept-Language":"en-US,en;q=0.5", "Accept-Encoding": "deflate", # "Connection":"keep-alive", "Content-Type": "application/x-www-form-urlencoded", } r = requests.get(url, timeout=timeout, headers=headers) if r.status_code == 200: content = r.content.decode(charset) else: print("Error:", r.status_code) except Exception as e: print("Error:", str(e)) print("2秒后重新链接。。。 error_index: %s" % error_index) time.sleep(2) error_index += 1 content = get_content(url) return content class WorkThread(QThread): triggerHtml = pyqtSignal(str) url = "" def run(self): print("url:%s" % self.url) content = get_content(self.url, charset="utf-8") self.triggerHtml.emit(content) class Testpattern(QMainWindow): def __init__(self): super().__init__() self.initUI() def initUI(self): self.statusBar = self.statusBar() self.setStatusBar(self.statusBar) self.statusBar.showMessage("初始化 test pattern") v = QVBoxLayout() comb = CombWidget() testcontent_btn = QPushButton("匹配") testcontent_btn.clicked.connect(self.patContent) test_btn = QPushButton("匹配") test_btn.clicked.connect(self.patDetail) gethtml_btn = QPushButton("获取网页内容") gethtml_btn.clicked.connect(self.getHtml) contentp_btn = QPushButton("content") contentp_btn.clicked.connect(lambda: self.contentpattern_text.insertPlainText('(?P<content>.+?)')) patbtns = comb.combVBox([(contentp_btn, 1), ]) dcontentp_btn = QPushButton("content") dtitlep_btn = QPushButton("title") dimgp_btn = QPushButton("img") durlp_btn = QPushButton("url") dcontentp_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('(?P<content>.+?)')) dtitlep_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('(?P<title>.{1,100}?)')) dimgp_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('(?P<img>[^>]+?)')) durlp_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('(?P<url>[^>]+?)')) dpatbtns = comb.combVBox([(dcontentp_btn, 1), (dtitlep_btn, 1), (dimgp_btn, 1),(durlp_btn, 1), ]) ddiv_btn = QPushButton("[^>]+?") dcontent_btn = QPushButton(".+") dcontent1_btn = QPushButton(".*") dspace_btn = QPushButton("\s+") dspace1_btn = QPushButton("\s*") ddiv_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('[^>]+?')) dcontent_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('.+')) dcontent1_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('.*')) dspace_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('\s+')) dspace1_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('\s*')) dspacebtns = comb.combVBox([(ddiv_btn, 1), (dcontent_btn, 1),(dcontent1_btn, 1),(dspace_btn, 1),(dspace1_btn, 1), ]) #<a href="(?P<url>[^>]+?)"><img src="(?P<img>[^>]+?)" alt="(?P<title>.{1,100}?)"> self.url_txt = QLineEdit("http://www.jiuaitu.com/learn/") self.url_txt.setFocus(True) self.url_txt.setPlaceholderText("请输入网址") self.ret_text = QTextEdit("") self.ret_text.setPlaceholderText("网页内容") self.contentpattern_text = QTextEdit() self.contentpattern_text.setPlainText(r'<article>(?P<content>.+?)</article>') self.contentpattern_text.setPlaceholderText("内容规则") self.contentpattern_text.setFocus(False) self.contenthtml_text = QTextEdit("") self.contenthtml_text.setPlaceholderText("匹配内容") self.pattern_text = QTextEdit("") self.pattern_text.setPlainText('<img src="(?P<img>[^>]+?)" alt="(?P<title>.{1,100}?)">') self.pattern_text.setPlaceholderText("详细规则") self.html_text = QTextEdit("") self.html_text.setPlaceholderText("匹配内容") v.addWidget(comb.combHBox([(self.url_txt, 12), (gethtml_btn, 2), ]), 1) v.addWidget(comb.combHBox([(self.ret_text, 12), ]), 3) v.addWidget( comb.combHBoxAlign([(self.contentpattern_text, 12, Qt.AlignVCenter), (patbtns, 1, Qt.AlignTop), (testcontent_btn, 2, Qt.AlignTop), ]), 1) v.addWidget(comb.combHBox([(self.contenthtml_text, 12), ]), 5) v.addWidget(comb.combHBoxAlign( [(self.pattern_text, 12, Qt.AlignVCenter), (dpatbtns, 1, Qt.AlignTop), (dspacebtns, 1, Qt.AlignTop), (test_btn, 2, Qt.AlignTop), ]), 1) v.addWidget(comb.combHBox([(self.html_text, 12), ]), 5) vwg = QWidget() vwg.setLayout(v) self.exec_time = time.time() self.setCentralWidget(vwg) self.workThread = WorkThread() self.workThread.triggerHtml.connect(self.showResult) self.setGeometry(300, 300, 800, 800) self.center() self.setWindowTitle("测试规则") def patContent(self): # <div class="blogs-list">(?P<content>.+)</div> print("匹配内容") try: pat = self.contentpattern_text.toPlainText() content = self.ret_text.toPlainText() print("pattren:%s" % pat) m = re.search(r"" + pat, content, re.S | re.I) if m is not None: dict = m.groupdict() self.contenthtml_text.setPlainText(dict["content"]) except Exception as e: self.contenthtml_text.setPlainText(str(e)) print(str(e)) def patDetail(self): print("匹配详细") pats = {"content": '\(\?P<content>', "title": '\(\?P<title>', "img": '\(\?P<title>', "url": '\(\?P<url>'} try: pat = self.pattern_text.toPlainText() pats = {"content": '(?P<content>', "title": '(?P<title>', "img": '(?P<img>', "url": '(?P<url>'} pat_names = [] for x in pats: if pats.get(x) in pat: pat_names.append(x) print("pattren:%s" % pat_names) content = self.contenthtml_text.toPlainText() print("pattren:%s" % pat) # 内容 if content == "": content = self.ret_text.toPlainText() m = re.finditer(r"" + pat, content, re.S | re.I) html = "" if m is not None: for dt in m: for name in pat_names: html = html + "<%s>:%s \n" % (name, dt.group(name)) html = html +"\n---------------------------------------------------------\n" self.html_text.setPlainText(html) except Exception as e: self.html_text.setPlainText(str(e)) print(str(e)) def showResult(self, text): self.statusBar.showMessage("请求成功 耗时 %.2f 秒" % (time.time() - self.exec_time)) self.ret_text.setPlainText(text) def getHtml(self): self.url = self.url_txt.text() if self.url.strip() == "": QMessageBox.warning(self, "提示", "URL网址不能为空", QMessageBox.Yes | QMessageBox.No) return self.statusBar.showMessage("请求URL:%s" % self.url) self.workThread.url = self.url self.exec_time = time.time() self.workThread.start() def center(self): qr = self.frameGeometry() cp = QDesktopWidget().availableGeometry().center() qr.moveCenter(cp) self.move(qr.topLeft()) if __name__ == "__main__": app = QApplication(sys.argv) ex = Testpattern() ex.show() sys.exit(app.exec_())
很赞哦! (0)
相关文章
文章评论
-
-
-
0条评论