Source: get_daily.js

/**
 * @author IITII
 * @date 2020/9/19 12:34
 */
'use strict';
const config = require('../config'),
  fetch = require('node-fetch'),
  cheerio = require('cheerio'),
  _ = require('lodash'),
  fs = require('fs'),
  path = require('path'),
  {logger} = require("./logger"),
  webdriver = require('selenium-webdriver'),
  By = webdriver.By,
  BROWSER = 'chrome',
  chrome = require(`selenium-webdriver/${BROWSER}`),
  until = webdriver.until,
  utils = require('./utils'),
  HttpsProxyAgent = require('https-proxy-agent'),
  PIXIV_USERNAME = config.pixiv.username,
  redis_api = require('./redis_api.js'),
  PIXIV_PASSWORD = config.pixiv.password;

let User_Agent = config.user_agent;

/**
 * return dom base on url
 * @param url Telegraph URL
 * @param proxy http proxy
 * @return {Promise<*>} cheerio
 */
async function getDom(url, proxy) {
  let data = await (utils.isNil(proxy)
    ? fetch(url)
    : fetch(url, {
      agent: new HttpsProxyAgent(proxy)
    }));
  let text = await data.text();
  return await cheerio.load(text);
}

/**
 * Login to pixiv
 * @param driver selenium driver
 * @param username pixiv username
 * @param password pixiv password
 */
async function login(driver, username, password) {
  await driver.get('https://accounts.pixiv.net/login');
  await utils.sleep(utils.getRandomSec(3, 10));
  await driver.wait(until.elementsLocated(By.id('LoginComponent')), 60000);
  let js = await fs.readFileSync(path.resolve(__dirname, '../dom/login.js'), {
    encoding: 'utf-8'
  });
  js = js.replace('username', username)
    .replace('password', password);
  await driver.executeScript(`${js}`);
  await utils.sleep(utils.getRandomSec(3, 10));
}

/**
 * Get Daily Rank Url
 * @param limit limit array size max: 50, default: 50
 * @return Array A array for Daily Rank Url with limit
 */
async function getDailyRankUrl(limit = 50) {
  return await new Promise(async (resolve, reject) => {
      try {
        if (limit <= 0 || limit > 50) {
          return reject('Limit should greater than 0 and less than 50');
        }
        const DAILY_RANKING_URL = 'https://www.pixiv.net/ranking.php?mode=daily&content=illust';
        let illustrationArray = [];
        let $ = await getDom(DAILY_RANKING_URL, config.pixiv.proxy || config.proxy || process.env.HTTP_PROXY || null);
        await $('.work').each((index, item) => {
          // MAX ARRAY SIZE: 50
          if (index < limit) {
            illustrationArray.push(new URL(DAILY_RANKING_URL).origin + item.attribs.href);
          }
        })
        return resolve(illustrationArray);
      } catch (e) {
        return reject(e);
      }
    }
  )
}

/**
 * Get img origin url
 * @param driver selenium driver
 * @param imgUrl {URL}
 * @param js {String}
 * @return Array {Array} array length maybe greater than 1
 */
async function getRealImgUrl(driver, imgUrl, js) {
  return await new Promise(async resolve => {
    await driver.get(imgUrl);
    await driver.wait(until.elementLocated(By.css('div[role="presentation"] > a')), 600000);
    await utils.sleep(utils.getRandomSec(3, 10));
    await driver.findElement(By.css('div[role="presentation"] > a')).click();
    await utils.sleep(utils.getRandomSec(3, 10));
    let array = await driver.executeScript(`return ${js}`);
    return resolve(array);
  });
}

async function getDaily() {
  return await new Promise(async (resolve, reject) => {
    if (utils.isNil(PIXIV_USERNAME) || utils.isNil(PIXIV_PASSWORD)) {
      return reject('Empty PIXIV_USERNAME or PIXIV_PASSWORD!!!');
    }
    try {
      const driver = new webdriver.Builder()
        .forBrowser(BROWSER)
        .setChromeOptions(new chrome.Options().addArguments(config.webdriver.args))
        .build();
      let rankUrls = await getDailyRankUrl();
      // Remove duplicate
      rankUrls = _.uniq(rankUrls);
      // We need remove duplicate before download new images.
      // Maybe this url had already downloaded at past.
      if (config.redis.enable) {
        rankUrls = await redis_api.unique(rankUrls);
      }
      if (rankUrls.length === 0) {
        return resolve([]);
      }
      await login(driver, PIXIV_USERNAME, PIXIV_PASSWORD);
      // Update user_agent with live chrome user_agent
      User_Agent = await driver.executeScript(`return navigator.userAgent`)
        || User_Agent;
      let js = await fs.readFileSync(path.resolve(__dirname, '../dom/img.js'), {
        encoding: 'utf-8'
      });
      let data = [];
      for (const rankUrl of rankUrls) {
        let tmp = await getRealImgUrl(driver, rankUrl, js);
        tmp.forEach(e => {
          logger.info(`Got ${e} from ${rankUrl}`);
          data.push({
            url: e,
            origin: rankUrl,
            // Duplicate with a very low probability
            savePath: config.save.currentImgSaveDir + path.sep + path.basename(new URL(e).pathname)
          });
        });
      }
      // We need remove duplicate before download new images.
      // Maybe this url had already downloaded at past.
      if (config.redis.enable) {
        // Due to pixiv's anti-spider, we will add keys into redis after all options is success.
        await redis_api.setRedis(rankUrls);
        redis_api.quitRedis();
      }
      // async quit, just for reduce time
      driver.quit().then(() => {
        logger.info(`driver quit successfully!!!`);
      })
        .catch(e => {
          logger.error(`driver quit failed!!!`);
          logger.error(e);
        });
      return resolve(_.uniqBy(data, 'url'));
    } catch (e) {
      return reject(e);
    }
  });
}

module.exports = {
  getDaily
};