added code example

This commit is contained in:
松浦 知也 Matsuura Tomoya 2023-12-14 17:04:31 +09:00
parent f290b18722
commit 843a90a0cd
9 changed files with 126 additions and 1 deletions

View File

@ -0,0 +1,12 @@
const puppeteer = require('puppeteer');
const main = async () => {
let browser = await puppeteer.launch({ headless: false });
let page = await browser.newPage();
await page.goto('https://www.youtube.com/');
await page.screenshot({ path: "./ss.png", fullPage: true });
await page.close();
await browser.close();
};
main();

View File

@ -0,0 +1,11 @@
const puppeteer = require('puppeteer');
const main = async () =>
puppeteer.launch({ headless: false })
.then(browser => browser.newPage()
.then(page => page.goto('https://www.youtube.com/')
.then(_response => page.screenshot({ path: "./ss.png" })
.then(_ => page.close()
.then(_ => browser.close())))));
main();

View File

@ -0,0 +1,21 @@
const puppeteer = require('puppeteer');
const take_screen_shot = async (browser, url, index) => {
let page = await browser.newPage();
let path = `${index}.png`;
await page.goto(url);
await page.screenshot({ path: path });
await page.close();
}
const main = async () => {
let browser = await puppeteer.launch({ headless: false ,args: ["--window-size=1920,1080"],timeout:0});
const urllist = ["https://www.youtube.com",
"https://www.google.com",
"https://www.geidai.ac.jp",
"https://yahoo.co.jp"]
await Promise.all(urllist.map((url, index) => take_screen_shot(browser, url, index)));
await browser.close();
}
main();

View File

@ -0,0 +1,13 @@
const puppeteer = require('puppeteer');
const autoScroll = require("./autoscroll.js").autoScroll;
const main = async () => {
let browser = await puppeteer.launch({ headless: false });
let page = await browser.newPage();
await page.goto('https://www.youtube.com/');
await autoScroll(page,10);
await page.screenshot({ path: "ss_full.png", fullPage: true })
await page.close();
await browser.close();
}
main();

View File

@ -0,0 +1,19 @@
const puppeteer = require('puppeteer');
const autoScroll = require("./autoscroll.js").autoScroll;
const main = async () => {
let browser = await puppeteer.launch({ headless: false });
let page = await browser.newPage();
await page.goto('https://www.youtube.com/');
for (let i = 0; i < 1000; i++) {
const urllist = await page.$$eval("a#thumbnail", elems => elems.map(elem => elem.href));
const url = urllist[3];
console.log(url)
await page.goto(url);
await autoScroll(page, 10);
await page.screenshot({ path: `${i}.png`, fullPage: true });
}
await page.close();
await browser.close();
}
main();

View File

@ -12,3 +12,19 @@ params:
## スライド
{{< embed_pdf >}}
## puppeteerのコードサンプル
`autoscroll.js`は`4_auto_scroll.js`でライブラリとして使用するだけなので、内容を理解しなくても大丈夫です。
{{< preview_code href="1_launch.js" type= "js">}}
{{< preview_code href="2_async_intro.js" type= "js">}}
{{< preview_code href="3_map_async.js" type= "js">}}
{{< preview_code href="4_auto_scroll.js" type= "js">}}
{{< preview_code href="5_youtube_hopper.js" type= "js">}}
{{< preview_code href="autoscroll.js" type= "js">}}

View File

@ -0,0 +1,18 @@
async function autoScroll(page, maxScrolls) {
await page.evaluate(async maxScrolls => {
await new Promise((then) => {
let distance = 1000;
let scrolls = 0;
let timer = setInterval(() => {
window.scrollBy(0, distance);
scrolls += 1;
if (scrolls >= maxScrolls) {
clearInterval(timer);
then();
}
}, 100);
});
}, maxScrolls);
}
exports.autoScroll = autoScroll;

View File

@ -0,0 +1,15 @@
{
"name": "20231201-scraping_test",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"jsdom": "^23.0.1",
"puppeteer": "^21.6.0",
}
}

View File

@ -32,7 +32,7 @@ bookCollapseSection: true
(大学公式のシラバスから一部更新されています)。
1. [10/6 オリエンテーション](./1)
2. [10/13 ターミナルを扱ってみる/10 Print](/.2)
2. [10/13 ターミナルを扱ってみる/10 Print](./2)
3. [10/20 グリッチ入門:画像と音の相互変換](./3)
4. [10/27 テキストから作る音Bytebeat](./4)
5. [10/31 火曜振替 配列としてのテキストデータ](./5)