油猴插件之指定（招聘）数据获取

插件的作用是辅助我们精准且快速获取信息，提升效率。
使用jquery获取指定数据并清洗，点击按钮下载为csv或存储到后台。

如何使用tampermonkey

看官方教程，推荐使用edge浏览器
https://www.tampermonkey.net/

通过jquery获取指定数据

招聘网站

在插件脚本 ==UserScript== 中配置目标网站

// @match        https://www.zhipin.com/job_detail/*
// @match        https://jobs.zhaopin.com/*
// @match        https://www.liepin.com/job/*
// @match        https://www.lagou.com/wn/jobs/*
// @match        https://*.58.com/*
// @match        https://jobs.51job.com/*

目标数据

const data = ["岗位名称", "薪资下限", "薪资上限", "薪资", "经验要求", "学历要求", "工作地点", "数据来源", "职位说明", "福利", "企业名称", "企业标签", "链接"]

获取数据并清洗

const FNS = {
  "lagou.com": () => {
    let [city, experience, education] = Array.from(
      document.querySelectorAll(".job_request h3 span")
    )
      .map((span) => span.textContent)
      .map((s) => s.replace(" /", "").trim());
    return {
      url: location.href,
      source: "拉勾网",
      name: document.querySelector(".position-head-wrap-position-name")
        ?.textContent,
      salary: document.querySelector(".salary")?.textContent,
      tags: document
        .querySelector(".job-advantage p")
        .textContent.split(/\s+/),
      experience,
      education,
      info: document.querySelector(".job-detail")?.textContent,
      city,
      location: document
        .querySelector(".work_addr span")
        ?.textContent?.replace("- ", ""),
      company: document.querySelector(".job_company_content .fl-cn")
        ?.textContent,
      company_tags: Array.from(
        document.querySelectorAll(".c_feature li h4")
      ).map((li) => li.textContent),
    };
  },
  ...
}

页面插入按钮

我们需要有选择的抓取数据，在页面中插入按钮

let downloadLink = document.createElement("div");
downloadLink.innerHTML =
  "<a onclick=\"window.postMessage('pluginGetData', '*')\" style=\"display:block;width:300px;height:100px; line-height: 100px; position:fixed; top:10px;right:10px;z-index:999999; background-color:#c4261d; box-shadow: 1px 2px 3px #000; color:#fff; font-size: 28px; text-align:center; cursor: pointer;\">》》获取数据《《</a>";
document.body.insertBefore(downloadLink, document.body.children[0]);

GM_ 与 window不能同时使用，因为GM_*在沙盒中运行。点击按钮后发送postMessage消息

1	`window.postMessage('pluginGetData', '*')`

页面与沙盒通信

接收页面消息，原页面与沙盒通信

window.addEventListener("message", receiveMessage, false);

function receiveMessage(event) {
  console.log("receiveMessage", event);
  if (event.data.includes("pluginGetData")) {
    pluginGetData();
  }
}

清洗后的数据上报

function pluginGetData() {
  // 获取数据，清洗中...
  // 清洗后的数据
  console.log(newJobData); 
  // 数据上报
  GM_xmlhttpRequest({
    url: "https://*/parse/classes/RecruitmentData",
    method: "POST",
    data: JSON.stringify(newJobData),
    headers: {
      "Content-Type": "application/json;charset=utf-8",
      "X-Parse-Application-Id": "*",
    },
    onload: function (xhr) {
      const data = xhr.responseText;
      if (data.includes("objectId")) {
        alert("操作成功！" + data);
      } else {
        alert("操作失败，请重试！" + data);
      }
    },
    onerror: function (err) {
      alert("操作失败，请重试！" + err);
    },
  });
}

其它

tips: 也可以直接在页面清洗数据，点击按钮后直接下载

// newJobData 清洗后数据
let downloadLink = document.createElement("a");
const blob = new Blob([newJobData], {type: 'text/csv,charset=UTF-8'});
const uri = URL.createObjectURL(blob);
downloadLink.src = uri
downloadLink.download = (name+".csv")||"temp.csv";

完整代码

// ==UserScript==
// @name         招聘数据
// @namespace    http://tampermonkey.net/
// @version      0.1
// @description  try to take over the world!
// @author       Csorz
// @match        https://www.zhipin.com/job_detail/*
// @match        https://jobs.zhaopin.com/*
// @match        https://www.liepin.com/job/*
// @match        https://www.lagou.com/wn/jobs/*
// @match        https://*.58.com/*
// @match        https://jobs.51job.com/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=liepin.com
// @grant        GM_xmlhttpRequest
// @grant unsafeWindow
// @require      https://cdn.bootcss.com/jquery/3.4.1/jquery.min.js
// ==/UserScript==

(function () {
  "use strict";
  const pluginGetData = () => {
    const SALARY =
      /(?<low>[0-9.])(?<lowUnit>[kK千wW万]?)[^0-9.]+(?<high>[0-9.])(?<highUnit>[kK千wW万]?)/;
    const SALARY_UNITS = {
      k: 1000,
      K: 1000,
      千: 1000,
      w: 10000,
      W: 10000,
      万: 10000,
    };

    function parseSalary(salary) {
      if (salary) {
        const data =
          /(?<low>[0-9.]+)(?<lowUnit>[kK千wW万]?)[^0-9.]+(?<high>[0-9.]+)(?<highUnit>[kK千wW万]?)/.exec(
            salary
          )?.groups;
        if (data) {
          const { low, lowUnit, high, highUnit } = data;
          return {
            low: Number(low) * (SALARY_UNITS[lowUnit || highUnit] || 1),
            high: Number(high) * (SALARY_UNITS[highUnit] || 1),
          };
        }
      }
      return {};
    }

    const FNS = {
      "51job.com": () => {
        let [city, experience, education] = document
          .querySelector(".ltype")
          .textContent.split("|")
          .map((s) => s.trim());
        return {
          url: location.href,
          source: "前程无忧",
          name: document.querySelector("h1")?.textContent,
          salary: document.querySelector("strong")?.textContent,
          tags: Array.from(document.querySelectorAll(".jtag span")).map(
            (span) => span.textContent
          ),
          experience,
          education,
          info: document.querySelector(".job_msg")?.textContent,
          city,
          location: Array.from(document.querySelectorAll(".bmsg .fp"))
            .map((p) => p.textContent)
            .filter((s) => s.startsWith("上班地址："))
            .map((s) => s.replace("上班地址：", ""))[0],
          company: document.querySelector(".com_name")?.textContent,
          company_tags: Array.from(document.querySelectorAll(".com_tag p"))
            .map((span) => span.title)
            .filter((s) => s),
        };
      },
      "58.com": () => {
        let [count, education, experience] = Array.from(
          document.querySelectorAll(".pos_base_condition span")
        ).map((span) => span.textContent);
        return {
          url: location.href,
          source: "58同城",
          name: document.querySelector(".pos_name")?.textContent,
          salary: document.querySelector(".pos_salary")?.textContent,
          tags: Array.from(document.querySelectorAll(".pos_welfare span")).map(
            (span) => span.textContent
          ),
          experience,
          education,
          info: document.querySelector(".posDes")?.textContent,
          city: document.querySelector(".pos_address .pos_area_item")
            ?.textContent,
          location: document.querySelector(".pos_area_span pos_address > span")
            ?.textContent,
          company: document.querySelector(".comp_baseInfo_title a")
            ?.textContent,
          company_tags: Array.from(
            document.querySelectorAll(".company_baseInfo > p")
          )
            .map((p) => p.textContent)
            .filter((s) => s),
        };
      },
      "lagou.com": () => {
        let [city, experience, education] = Array.from(
          document.querySelectorAll(".job_request h3 span")
        )
          .map((span) => span.textContent)
          .map((s) => s.replace(" /", "").trim());
        return {
          url: location.href,
          source: "拉勾网",
          name: document.querySelector(".position-head-wrap-position-name")
            ?.textContent,
          salary: document.querySelector(".salary")?.textContent,
          tags: document
            .querySelector(".job-advantage p")
            .textContent.split(/\s+/),
          experience,
          education,
          info: document.querySelector(".job-detail")?.textContent,
          city,
          location: document
            .querySelector(".work_addr span")
            ?.textContent?.replace("- ", ""),
          company: document.querySelector(".job_company_content .fl-cn")
            ?.textContent,
          company_tags: Array.from(
            document.querySelectorAll(".c_feature li h4")
          ).map((li) => li.textContent),
        };
      },
      "liepin.com": () => {
        let [city, experience, education] = Array.from(
          document.querySelectorAll(".job-properties span")
        )
          .map((span) => span.textContent)
          .filter((s) => s);
        return {
          url: location.href,
          source: "猎聘",
          name: document.querySelector(".name")?.textContent,
          salary: document.querySelector(".salary")?.textContent,
          tags: Array.from(document.querySelectorAll(".labels span")).map(
            (span) => span.textContent
          ),
          experience,
          education,
          info: document.querySelector("[data-selector='job-intro-content']")
            ?.textContent,
          city,
          location: Array.from(
            document.querySelectorAll(".company-other .label-box")
          )
            .filter(
              (div) => div.querySelector(".label")?.textContent === "职位地址："
            )
            .map((div) => div.querySelector(".text")?.textContent)[0],
          company: document.querySelector(".company-card .name")?.textContent,
          company_tags: Array.from(
            document.querySelectorAll(".company-other .label-box")
          )
            .filter(
              (div) => div.querySelector(".label")?.textContent !== "职位地址："
            )
            .map((div) => div.querySelector(".text")?.textContent),
        };
      },
      "zhaopin.com": () => {
        let [city, experience, education] = Array.from(
          document.querySelectorAll(".summary-plane__info li")
        ).map((li) => li.textContent);
        return {
          url: location.href,
          source: "智联招聘",
          name: document.querySelector(".summary-plane__title")?.textContent,
          salary: document.querySelector(".summary-plane__salary")?.textContent,
          tags: Array.from(
            document.querySelectorAll(".highlights__content span")
          ).map((span) => span.textContent),
          experience,
          education,
          info: document.querySelector(".describtion")?.textContent,
          city,
          location: document.querySelector(".job-address__content-text")
            ?.textContent,
          company: document.querySelector(".company a")?.textContent,
          company_tags: Array.from(
            document.querySelectorAll(".company__detail button")
          ).map((button) => button.textContent),
        };
      },
      "zhipin.com": () => {
        let [city, experience, education] = Array.from(
          document.querySelector(".job-primary .info-primary > p")
            ?.childNodes || []
        )
          .map((span) => span.textContent)
          .filter((s) => s);
        return {
          url: location.href,
          source: "BOSS直聘",
          name: document.querySelector(".job-primary .name h1")?.textContent,
          salary: document.querySelector(".job-primary .salary")?.textContent,
          tags: Array.from(
            document.querySelectorAll(
              ".job-primary > .tag-container-new > .job-tags span"
            )
          ).map((span) => span.textContent),
          experience,
          education,
          info: document.querySelector(".job-sec .text")?.textContent,
          city,
          location: document.querySelector(".location-address")?.textContent,
          company: document
            .querySelector(".company-info [ka='job-detail-company_custompage']")
            ?.textContent?.trim(),
          company_tags: Array.from(
            document.querySelectorAll(".sider-company > p:not(.title)")
          ).map((p) => p.textContent),
        };
      },
    };

    let [domain] = /[^.]+.[^.]+$/.exec(location.hostname);
    const fns = FNS[domain];
    const job = fns();
    const {
      name,
      salary,
      experience,
      education,
      city,
      source,
      info,
      tags,
      company,
      company_tags,
      url,
    } = job;
    const { low, high } = parseSalary(salary);
    const newJobData = {
      name,
      salary_low: low,
      salary_high: high,
      salary,
      experience,
      education,
      city,
      source,
      info,
      tags: tags?.join(","),
      company,
      company_tags: company_tags?.join(","),
      url,
    };
    // 数据说明
    // const data = [
    //     ["岗位名称", "薪资下限", "薪资上限", "薪资", "经验要求", "学历要求", "工作地点", "数据来源", "职位说明", "福利", "企业名称", "企业标签", "链接"]
    // ];


    console.log(newJobData);
    GM_xmlhttpRequest({
      url: "https://*/parse/classes/RecruitmentData",
      method: "POST",
      data: JSON.stringify(newJobData),
      headers: {
        "Content-Type": "application/json;charset=utf-8",
        "X-Parse-Application-Id": "*",
      },
      onload: function (xhr) {
        const data = xhr.responseText;
        if (data.includes("objectId")) {
          alert("操作成功！" + data);
        } else {
          alert("操作失败，请重试！" + data);
        }
      },
      onerror: function (err) {
        alert("操作失败，请重试！" + err);
      },
    });
  };

  // 接收页面消息，原页面与沙盒通信
  window.addEventListener("message", receiveMessage, false);

  function receiveMessage(event) {
    console.log("receiveMessage", event);
    if (event.data.includes("pluginGetData")) {
      pluginGetData();
    }
  }

  // 页面插入按钮
  let downloadLink = document.createElement("div");
  // GM_ 与 window不能同时使用，因为GM_*在沙盒中运行。
  downloadLink.innerHTML =
    "<a onclick=\"window.postMessage('pluginGetData', '*')\" style=\"display:block;width:300px;height:100px; line-height: 100px; position:fixed; top:10px;right:10px;z-index:999999; background-color:#c4261d; box-shadow: 1px 2px 3px #000; color:#fff; font-size: 28px; text-align:center; cursor: pointer;\">》》获取数据《《</a>";

  // 可以点击下载csv文件
  // const blob = new Blob([newJobData], {type: 'text/csv,charset=UTF-8'});
  // const uri = URL.createObjectURL(blob);
  // downloadLink.src = uri
  // downloadLink.download = (name+".csv")||"temp.csv";

  document.body.insertBefore(downloadLink, document.body.children[0]);

})();

油猴插件之指定（招聘）数据获取

https://cszy.top/20230226-油猴插件之指定（招聘）数据获取/

作者

csorz

发布于

2023年2月26日

许可协议

油猴插件之表格数据获取上一篇

FFMPEG实践下一篇