1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
| import random import re from threading import Thread import time
import numpy import requests from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By
""" 任何疑问,请加qq群咨询:427847187 我看到了一定会耐心解答的!!! 代码前身可能更容易理解一点:https://github.com/Zemelee/wjx/blob/master/wjx.py --- 使用教程: https://www.bilibili.com/video/BV1qc411T7CG/ 代码使用规则: 你需要提前安装python环境,且已具备上述的所有安装包(selenium版本号需要和webdriver匹配) 还需要下载好chrome的webDriver自动化工具,并将其放在python安装目录下,以便和selenium配套使用,准备工作做好即可直接运行 按要求填写概率值并替换成自己的问卷链接即可运行。 虽然但是!!!即使正确填写概率值,不保证100%成功运行,因为代码再强大也强大不过问卷星的灵活性,别问我怎么知道的,都是泪 如果有疑问欢迎打扰我,如果不会python但确有需要也可以找我帮你刷嗷~(2023.05.04) """
""" 获取代理ip,这里要使用到一个叫“太阳http代理”的第三方服务https://www.tyhttp.com/getapi/ 先将自己电脑的公网ip添加到网站的白名单中,然后获取ip链接即可 注意!!!获取到的ip可能具有多种格式,但是脚本是按照type=3、port=1、lb=2、pb=4格式来提取获取到的ip的 如果格式与上述不一致可能提取不到ip,所以按照上述格式获取ip哦,关于代理ip的更多使用方法参考官网 """ ips = [] api = f"http://http.tiqu.alibabaapi.com/getip?num=10&type=3&pack={'你的值'}&port=1&lb=2&pb=4®ions=" ip_and_port = requests.get(api).text pattern = r"(\d+\.\d+\.\d+\.\d+):(\d+)" matches = re.findall(pattern, ip_and_port) for match in matches: ip = match[0] port = match[1] dist = {"ip": ip, "port": port} ips.append(dist)
print("代理ip:", ips)
url = 'https://www.wjx.cn/vm/OM6GYNV.aspx#'
""" 单选题概率参数,"1"表示第一题,0表示不选, [30, 70]表示3:7,-1表示随机 在示例问卷中,第一题有三个选项,"1"后面的概率参数也应该设置三个值才对,否则会报错!!! 同时,题号其实不重要,只是为了填写概率值时方便记录我才加上去的,这个字典在真正使用前会转化为一个列表;(这一行没看懂没关系,下面一行懂了就行) 最重要的其实是保证single_prob的第n个参数对应第n个单选题,比如在示例问卷中第5题是滑块题,但是我single_prob却有“第5题”,因为这个"5"其实对应的是第5个单选题,也就是问卷中的第6题 这个single_prob的"5"可以改成其他任何值,当然我不建议你这么干,因为问卷中只有5个单选题,所以第6个单选题的参数其实是没有用上的,参数只能多不能少!!!(这一点其他类型的概率参数也适用) """ single_prob = {"1": [1, 1, 0], "2": -1, "3": -1, "4": -1, "5": -1, "6": [1, 0], }
droplist_prob = {"1": [1, 1, 1]}
multiple_prob = {"9": [100, 2, 1, 1]}
multiple_opts = {"9": 1, }
matrix_prob = {"1": [1, 0, 0, 0, 0], "2": -1, "3": [1, 0, 0, 0, 0], "4": [1, 0, 0, 0, 0], "5": [1, 0, 0, 0, 0], "6": [1, 0, 0, 0, 0]}
scale_prob = {"7": [0, 2, 3, 4, 1], "12": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]}
texts = {"8": ["内容1", "内容2", " 内容3"], }
texts_prob = {"8": [1, 1, 1]}
for prob in [single_prob, matrix_prob, droplist_prob, scale_prob, texts_prob]: for key in prob: if isinstance(prob[key], list) and prob[key] != -1: prob_sum = sum(prob[key]) prob[key] = [x / prob_sum for x in prob[key]]
single_prob = list(single_prob.values()) droplist_prob = list(droplist_prob.values()) multiple_prob = list(multiple_prob.values()) multiple_opts = list(multiple_opts.values()) matrix_prob = list(matrix_prob.values()) scale_prob = list(scale_prob.values()) texts_prob = list(texts_prob.values()) texts = list(texts.values())
print("单选题参数:", single_prob) print("下拉框参数:", droplist_prob) print("多选题参数:", multiple_prob) print("矩阵题参数:", matrix_prob) print("量表题参数:", scale_prob)
def detect(driver): q_list = [] xpath = '//*[@id="divQuestion"]/fieldset' page_num = len(driver.find_elements(By.XPATH, xpath)) qs = driver.find_elements(By.XPATH, f'//*[@id="fieldset1"]/div') invalid_item = 0 for qs_item in qs: if qs_item.get_attribute("topic").isdigit() is False: invalid_item += 1 q_list.append(len(qs) - invalid_item) if page_num >= 2: for i in range(2, page_num + 1): qs = driver.find_elements(By.XPATH, f'//*[@id="fieldset{i}"]/div') invalid_item = 0 for qs_item in qs: if qs_item.get_attribute("topic").isdigit() is False: invalid_item += 1 q_list.append(len(qs) - invalid_item) return q_list
def vacant(driver, current, index): content = texts[index] p = texts_prob[index] text_index = numpy.random.choice(a=numpy.arange(0, len(p)), p=p) driver.find_element(By.CSS_SELECTOR, f'#q{current}').send_keys(content[text_index])
def single(driver, current, index): xpath = f'//*[@id="div{current}"]/div[2]/div' a = driver.find_elements(By.XPATH, xpath) p = single_prob[index] if p == -1: r = random.randint(1, len(a)) else: r = numpy.random.choice(a=numpy.arange(1, len(a) + 1), p=p) driver.find_element(By.CSS_SELECTOR, f'#div{current} > div.ui-controlgroup > div:nth-child({r})').click()
def droplist(driver, current, index): driver.find_element(By.CSS_SELECTOR, f"#select2-q{current}-container").click() time.sleep(0.5) options = driver.find_elements(By.XPATH, f"//*[@id='select2-q{current}-results']/li") p = droplist_prob[index] r = numpy.random.choice(a=numpy.arange(1, len(options)), p=p) driver.find_element(By.XPATH, f"//*[@id='select2-q{current}-results']/li[{r + 1}]").click()
def multiple(driver, current, index): xpath = f'//*[@id="div{current}"]/div[2]/div' options = driver.find_elements(By.XPATH, xpath) probabilities = multiple_prob[index] if probabilities == 0: return elif probabilities == -1: r = random.randint(1, len(options)) driver.find_element(By.CSS_SELECTOR, f'#div{current} > div.ui-controlgroup > div:nth-child({r})').click() else: prob_copy = probabilities.copy() opts_num = multiple_opts[index] for i in prob_copy: if i == 100: sure = prob_copy.index(i) driver.find_element(By.CSS_SELECTOR, f'#div{current} > div.ui-controlgroup > div:nth-child({sure + 1})').click() prob_copy[sure] = 0 total = sum([num for num in prob_copy]) if total == 0: return probabilities_norm = [num / total if num != 0 else 0 for num in prob_copy] selection_indices = numpy.random.choice( range(len(options)), size=opts_num, replace=False, p=probabilities_norm) for i in selection_indices: driver.find_element(By.CSS_SELECTOR, f'#div{current} > div.ui-controlgroup > div:nth-child({i + 1})').click()
def matrix(driver, current, index): xpath1 = f'//*[@id="divRefTab{current}"]/tbody/tr' a = driver.find_elements(By.XPATH, xpath1) q_num = 0 for tr in a: if tr.get_attribute("rowindex") is not None: q_num += 1 xpath2 = f'//*[@id="drv{current}_1"]/td' b = driver.find_elements(By.XPATH, xpath2) for i in range(1, q_num + 1): p = matrix_prob[index] index += 1 if p == -1: opt = random.randint(2, len(b)) else: opt = numpy.random.choice(a=numpy.arange(2, len(b) + 1), p=p) driver.find_element(By.CSS_SELECTOR, f'#drv{current}_{i} > td:nth-child({opt})').click() return index
def reorder(driver, current): xpath = f'//*[@id="div{current}"]/ul/li' a = driver.find_elements(By.XPATH, xpath) for j in range(1, len(a) + 1): b = random.randint(j, len(a)) driver.find_element(By.CSS_SELECTOR, f'#div{current} > ul > li:nth-child({b})').click() time.sleep(0.4)
def scale(driver, current, index): xpath = f'//*[@id="div{current}"]/div[2]/div/ul/li' a = driver.find_elements(By.XPATH, xpath) p = scale_prob[index] if p == -1: b = random.randint(1, len(a)) else: b = numpy.random.choice(a=numpy.arange(1, len(a) + 1), p=p) driver.find_element(By.CSS_SELECTOR, f"#div{current} > div.scale-div > div > ul > li:nth-child({b})").click()
def brush(driver): q_list = detect(driver) single_num = 0 vacant_num = 0 droplist_num = 0 multiple_num = 0 matrix_num = 0 scale_num = 0 current = 0 for j in q_list: for k in range(1, j + 1): current += 1 q_type = driver.find_element(By.CSS_SELECTOR, f'#div{current}').get_attribute("type") if q_type == "1" or q_type == "2": vacant(driver, current, vacant_num) vacant_num += 1 elif q_type == "3": single(driver, current, single_num) single_num += 1 elif q_type == "4": multiple(driver, current, multiple_num) multiple_num += 1 elif q_type == "5": scale(driver, current, scale_num) scale_num += 1 elif q_type == "6": matrix_num = matrix(driver, current, matrix_num) elif q_type == "7": droplist(driver, current, droplist_num) droplist_num += 1 elif q_type == "8": score = random.randint(1, 100) driver.find_element(By.CSS_SELECTOR, f'#q{current}').send_keys(score) elif q_type == "11": reorder(driver, current) else: print(f"第{k}题为不支持题型!") time.sleep(0.5) try: driver.find_element(By.CSS_SELECTOR, '#divNext').click() time.sleep(0.5) except: driver.find_element(By.XPATH, '//*[@id="ctlNext"]').click() submit(driver)
def submit(driver): time.sleep(1) try: driver.find_element(By.XPATH, '//*[@id="layui-layer1"]/div[3]/a').click() time.sleep(1) except: pass try: driver.find_element(By.XPATH, '//*[@id="SM_BTN_1"]').click() time.sleep(3) except: pass try: slider = driver.find_element(By.XPATH, '//*[@id="nc_1__scale_text"]/span') if str(slider.text).startswith("请按住滑块"): width = slider.size.get('width') ActionChains(driver).drag_and_drop_by_offset(slider, width, 0).perform() except: pass
def run(xx, yy): option = webdriver.ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_experimental_option('useAutomationExtension', False) if len(ips) == 0: pass else: r = random.randint(0, len(ips) - 1) current_ip = ips[r]["ip"] current_port = ips[r]["port"] option.add_argument(f'--proxy-server={current_ip}:{current_port}') driver = webdriver.Chrome(options=option) driver.set_window_size(600, 400) driver.set_window_position(x=xx, y=yy) driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'}) while True: global count driver.get(url) url1 = driver.current_url brush(driver) time.sleep(4) url2 = driver.current_url if url1 != url2: count += 1 print(f"已填写{count}份 - {time.strftime('%H:%M:%S', time.localtime(time.time()))}") driver.get(url) else: time.sleep(2)
if __name__ == "__main__": count = 0 thread_1 = Thread(target=run, args=(50, 50)) thread_1.start() thread_2 = Thread(target=run, args=(650, 280)) thread_2.start()
|