From 843a90a0cd2018c7f59b59f51b83c7789c67dd3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=BE=E6=B5=A6=20=E7=9F=A5=E4=B9=9F=20Matsuura=20Tomoy?= =?UTF-8?q?a?= Date: Thu, 14 Dec 2023 17:04:31 +0900 Subject: [PATCH] added code example --- .../2023/mediaart-programming2/10/1_launch.js | 12 +++++++++++ .../mediaart-programming2/10/2_async_intro.js | 11 ++++++++++ .../mediaart-programming2/10/3_map_async.js | 21 +++++++++++++++++++ .../mediaart-programming2/10/4_auto_scroll.js | 13 ++++++++++++ .../10/5_youtube_hopper.js | 19 +++++++++++++++++ .../2023/mediaart-programming2/10/_index.md | 16 ++++++++++++++ .../mediaart-programming2/10/autoscroll.js | 18 ++++++++++++++++ .../mediaart-programming2/10/package.json | 15 +++++++++++++ .../docs/2023/mediaart-programming2/_index.md | 2 +- 9 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 content/docs/2023/mediaart-programming2/10/1_launch.js create mode 100644 content/docs/2023/mediaart-programming2/10/2_async_intro.js create mode 100644 content/docs/2023/mediaart-programming2/10/3_map_async.js create mode 100644 content/docs/2023/mediaart-programming2/10/4_auto_scroll.js create mode 100644 content/docs/2023/mediaart-programming2/10/5_youtube_hopper.js create mode 100644 content/docs/2023/mediaart-programming2/10/autoscroll.js create mode 100644 content/docs/2023/mediaart-programming2/10/package.json diff --git a/content/docs/2023/mediaart-programming2/10/1_launch.js b/content/docs/2023/mediaart-programming2/10/1_launch.js new file mode 100644 index 0000000..e8143e8 --- /dev/null +++ b/content/docs/2023/mediaart-programming2/10/1_launch.js @@ -0,0 +1,12 @@ +const puppeteer = require('puppeteer'); + +const main = async () => { + let browser = await puppeteer.launch({ headless: false }); + let page = await browser.newPage(); + await page.goto('https://www.youtube.com/'); + await page.screenshot({ path: "./ss.png", fullPage: true }); + await page.close(); + await browser.close(); +}; + +main(); diff --git a/content/docs/2023/mediaart-programming2/10/2_async_intro.js b/content/docs/2023/mediaart-programming2/10/2_async_intro.js new file mode 100644 index 0000000..0d45dac --- /dev/null +++ b/content/docs/2023/mediaart-programming2/10/2_async_intro.js @@ -0,0 +1,11 @@ +const puppeteer = require('puppeteer'); + +const main = async () => + puppeteer.launch({ headless: false }) + .then(browser => browser.newPage() + .then(page => page.goto('https://www.youtube.com/') + .then(_response => page.screenshot({ path: "./ss.png" }) + .then(_ => page.close() + .then(_ => browser.close()))))); + +main(); diff --git a/content/docs/2023/mediaart-programming2/10/3_map_async.js b/content/docs/2023/mediaart-programming2/10/3_map_async.js new file mode 100644 index 0000000..a64a962 --- /dev/null +++ b/content/docs/2023/mediaart-programming2/10/3_map_async.js @@ -0,0 +1,21 @@ +const puppeteer = require('puppeteer'); + +const take_screen_shot = async (browser, url, index) => { + let page = await browser.newPage(); + let path = `${index}.png`; + await page.goto(url); + await page.screenshot({ path: path }); + await page.close(); +} + +const main = async () => { + let browser = await puppeteer.launch({ headless: false ,args: ["--window-size=1920,1080"],timeout:0}); + const urllist = ["https://www.youtube.com", + "https://www.google.com", + "https://www.geidai.ac.jp", + "https://yahoo.co.jp"] + await Promise.all(urllist.map((url, index) => take_screen_shot(browser, url, index))); + await browser.close(); +} + +main(); \ No newline at end of file diff --git a/content/docs/2023/mediaart-programming2/10/4_auto_scroll.js b/content/docs/2023/mediaart-programming2/10/4_auto_scroll.js new file mode 100644 index 0000000..cedf0d8 --- /dev/null +++ b/content/docs/2023/mediaart-programming2/10/4_auto_scroll.js @@ -0,0 +1,13 @@ +const puppeteer = require('puppeteer'); +const autoScroll = require("./autoscroll.js").autoScroll; + +const main = async () => { + let browser = await puppeteer.launch({ headless: false }); + let page = await browser.newPage(); + await page.goto('https://www.youtube.com/'); + await autoScroll(page,10); + await page.screenshot({ path: "ss_full.png", fullPage: true }) + await page.close(); + await browser.close(); +} +main(); \ No newline at end of file diff --git a/content/docs/2023/mediaart-programming2/10/5_youtube_hopper.js b/content/docs/2023/mediaart-programming2/10/5_youtube_hopper.js new file mode 100644 index 0000000..b84bb0e --- /dev/null +++ b/content/docs/2023/mediaart-programming2/10/5_youtube_hopper.js @@ -0,0 +1,19 @@ +const puppeteer = require('puppeteer'); +const autoScroll = require("./autoscroll.js").autoScroll; + +const main = async () => { + let browser = await puppeteer.launch({ headless: false }); + let page = await browser.newPage(); + await page.goto('https://www.youtube.com/'); + for (let i = 0; i < 1000; i++) { + const urllist = await page.$$eval("a#thumbnail", elems => elems.map(elem => elem.href)); + const url = urllist[3]; + console.log(url) + await page.goto(url); + await autoScroll(page, 10); + await page.screenshot({ path: `${i}.png`, fullPage: true }); + } + await page.close(); + await browser.close(); +} +main(); \ No newline at end of file diff --git a/content/docs/2023/mediaart-programming2/10/_index.md b/content/docs/2023/mediaart-programming2/10/_index.md index 00c9371..7fbd5ff 100644 --- a/content/docs/2023/mediaart-programming2/10/_index.md +++ b/content/docs/2023/mediaart-programming2/10/_index.md @@ -12,3 +12,19 @@ params: ## スライド {{< embed_pdf >}} + +## puppeteerのコードサンプル + +`autoscroll.js`は`4_auto_scroll.js`でライブラリとして使用するだけなので、内容を理解しなくても大丈夫です。 + +{{< preview_code href="1_launch.js" type= "js">}} + +{{< preview_code href="2_async_intro.js" type= "js">}} + +{{< preview_code href="3_map_async.js" type= "js">}} + +{{< preview_code href="4_auto_scroll.js" type= "js">}} + +{{< preview_code href="5_youtube_hopper.js" type= "js">}} + +{{< preview_code href="autoscroll.js" type= "js">}} \ No newline at end of file diff --git a/content/docs/2023/mediaart-programming2/10/autoscroll.js b/content/docs/2023/mediaart-programming2/10/autoscroll.js new file mode 100644 index 0000000..f6b83d5 --- /dev/null +++ b/content/docs/2023/mediaart-programming2/10/autoscroll.js @@ -0,0 +1,18 @@ +async function autoScroll(page, maxScrolls) { + await page.evaluate(async maxScrolls => { + await new Promise((then) => { + let distance = 1000; + let scrolls = 0; + let timer = setInterval(() => { + window.scrollBy(0, distance); + scrolls += 1; + if (scrolls >= maxScrolls) { + clearInterval(timer); + then(); + } + }, 100); + }); + }, maxScrolls); +} + +exports.autoScroll = autoScroll; \ No newline at end of file diff --git a/content/docs/2023/mediaart-programming2/10/package.json b/content/docs/2023/mediaart-programming2/10/package.json new file mode 100644 index 0000000..db610a4 --- /dev/null +++ b/content/docs/2023/mediaart-programming2/10/package.json @@ -0,0 +1,15 @@ +{ + "name": "20231201-scraping_test", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "", + "license": "ISC", + "dependencies": { + "jsdom": "^23.0.1", + "puppeteer": "^21.6.0", + } +} \ No newline at end of file diff --git a/content/docs/2023/mediaart-programming2/_index.md b/content/docs/2023/mediaart-programming2/_index.md index 9d6bbc3..95b7801 100644 --- a/content/docs/2023/mediaart-programming2/_index.md +++ b/content/docs/2023/mediaart-programming2/_index.md @@ -32,7 +32,7 @@ bookCollapseSection: true (大学公式のシラバスから一部更新されています)。 1. [(10/6) オリエンテーション](./1) -2. [(10/13) ターミナルを扱ってみる/10 Print](/.2) +2. [(10/13) ターミナルを扱ってみる/10 Print](./2) 3. [(10/20) グリッチ入門:画像と音の相互変換](./3) 4. [(10/27) テキストから作る音:Bytebeat](./4) 5. [(10/31 火曜振替) 配列としてのテキストデータ](./5)