272 lines
25 KiB
Markdown
272 lines
25 KiB
Markdown
|
---
|
|||
|
title: スタバの店舗情報をDBに登録するスクリプト
|
|||
|
author: kazu634
|
|||
|
date: 2009-09-17
|
|||
|
url: /2009/09/17/_1329/
|
|||
|
wordtwit_post_info:
|
|||
|
- 'O:8:"stdClass":13:{s:6:"manual";b:0;s:11:"tweet_times";i:1;s:5:"delay";i:0;s:7:"enabled";i:1;s:10:"separation";s:2:"60";s:7:"version";s:3:"3.7";s:14:"tweet_template";b:0;s:6:"status";i:2;s:6:"result";a:0:{}s:13:"tweet_counter";i:2;s:13:"tweet_log_ids";a:1:{i:0;i:4783;}s:9:"hash_tags";a:0:{}s:8:"accounts";a:1:{i:0;s:7:"kazu634";}}'
|
|||
|
categories:
|
|||
|
- Perl
|
|||
|
- starbucks
|
|||
|
|
|||
|
---
|
|||
|
<div class="section">
|
|||
|
<p>
|
|||
|
ようやく完成しました。Web::Scraperって、奥が深いです。やろうと考えてから、実際にできあがるまで、かなり時間がたっているような気がする…が、あまり気にしないようにします。
|
|||
|
</p>
|
|||
|
|
|||
|
<h4>
|
|||
|
ソース
|
|||
|
</h4>
|
|||
|
|
|||
|
<pre class="syntax-highlight">
|
|||
|
<span class="synPreProc">#!/usr/bin/perl</span>
|
|||
|
<span class="synStatement">use strict</span>;
|
|||
|
<span class="synStatement">use </span>Web::Scraper;
|
|||
|
<span class="synStatement">use </span>URI;
|
|||
|
<span class="synStatement">use </span>URI::Escape;
|
|||
|
<span class="synStatement">use utf8</span>;
|
|||
|
<span class="synStatement">use </span>YAML;
|
|||
|
<span class="synStatement">use </span>Perl6::Say;
|
|||
|
<span class="synStatement">use </span>Encode;
|
|||
|
<span class="synStatement">use </span>DBI;
|
|||
|
<span class="synComment"># =========================</span>
|
|||
|
<span class="synComment"># === 各県へのURLを取得 ===</span>
|
|||
|
<span class="synComment"># =========================</span>
|
|||
|
<span class="synComment"># starbucksのURLを指定</span>
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$uri</span> = URI-><span class="synStatement">new</span>(<span class="synConstant">"http://www.starbucks.co.jp/search/index.html"</span>);
|
|||
|
<span class="synComment"># スクレイピングの設定を行う</span>
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$scraper</span> = scraper {
|
|||
|
process <span class="synConstant">'//area[@href=~/.+SearchPerfecture/]'</span>, <span class="synConstant">'prefs[]'</span> => [
|
|||
|
<span class="synConstant">'@href'</span>,
|
|||
|
<span class="synIdentifier"> </span><span class="synStatement">sub</span><span class="synIdentifier"> </span>{
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$url</span> = <span class="synIdentifier">$_</span>->as_string;
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">@url_split</span> = <span class="synStatement">split</span>( /=/, <span class="synIdentifier">$url</span> );
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$utf8_encode</span> = <span class="synStatement">pop</span>(<span class="synIdentifier">@url_split</span>);
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$utf8</span> = uri_unescape(<span class="synIdentifier">$utf8_encode</span>);
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$temp</span> = decode( <span class="synConstant">'utf8'</span>, <span class="synIdentifier">$utf8</span> );
|
|||
|
<span class="synIdentifier">$temp</span> = encode( <span class="synConstant">'shiftjis'</span>, <span class="synIdentifier">$temp</span> );
|
|||
|
<span class="synIdentifier">$temp</span> = uri_escape(<span class="synIdentifier">$temp</span>);
|
|||
|
<span class="synStatement">push</span>( <span class="synIdentifier">@url_split</span>, <span class="synIdentifier">$temp</span> );
|
|||
|
<span class="synStatement">return</span> <span class="synStatement">join</span>( <span class="synConstant">'='</span>, <span class="synIdentifier">@url_split</span> );
|
|||
|
}
|
|||
|
];
|
|||
|
process <span class="synConstant">'//td[@class="SelectFromPlace"]//a'</span>, <span class="synConstant">'cities[]'</span> => <span class="synConstant">'@href'</span>;
|
|||
|
};
|
|||
|
<span class="synComment"># スクレイピングの実行</span>
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$result</span> = <span class="synIdentifier">$scraper</span>->scrape(<span class="synIdentifier">$uri</span>);
|
|||
|
<span class="synStatement">foreach</span> <span class="synStatement">my</span> <span class="synIdentifier">$page</span> ( @{ <span class="synIdentifier">$result</span>->{<span class="synConstant">'cities'</span>} } ) {
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$scraper</span> = scraper {
|
|||
|
process <span class="synConstant">'//map[@name=~/[^_]+_Map/]'</span>, <span class="synConstant">'city[]'</span> => scraper {
|
|||
|
process <span class="synConstant">'//area[@href=~/.+result_city3.php/]'</span>, <span class="synConstant">'shops[]'</span> => [
|
|||
|
<span class="synConstant">'@href'</span>,
|
|||
|
<span class="synIdentifier"> </span><span class="synStatement">sub</span><span class="synIdentifier"> </span>{
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$url</span> = <span class="synIdentifier">$_</span>->as_string;
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">@work</span>;
|
|||
|
<span class="synStatement">if</span> ( <span class="synIdentifier">$url</span> =~
|
|||
|
<span class="synStatement">/</span><span class="synConstant">SearchPerfecture=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)\&</span><span class="synConstant">SearchCity=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)\&</span><span class="synConstant">SearchCity2=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)</span><span class="synConstant">&</span><span class="synStatement">/</span>
|
|||
|
)
|
|||
|
{
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$pref</span> = uri_escape(
|
|||
|
encode(
|
|||
|
<span class="synConstant">'shiftjis'</span>, decode( <span class="synConstant">'utf8'</span>, uri_unescape(<span class="synIdentifier">$1</span>) )
|
|||
|
)
|
|||
|
);
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$city1</span> = uri_escape(
|
|||
|
encode(
|
|||
|
<span class="synConstant">'shiftjis'</span>, decode( <span class="synConstant">'utf8'</span>, uri_unescape(<span class="synIdentifier">$2</span>) )
|
|||
|
)
|
|||
|
);
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$city2</span> = uri_escape(
|
|||
|
encode(
|
|||
|
<span class="synConstant">'shiftjis'</span>, decode( <span class="synConstant">'utf8'</span>, uri_unescape(<span class="synIdentifier">$3</span>) )
|
|||
|
)
|
|||
|
);
|
|||
|
<span class="synIdentifier">$url</span> =
|
|||
|
<span class="synStatement">s/</span><span class="synConstant">SearchPerfecture=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)\&</span><span class="synConstant">SearchCity=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)\&</span><span class="synConstant">SearchCity2=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)</span><span class="synConstant">&</span><span class="synStatement">/</span><span class="synConstant">SearchPerfecture=</span><span class="synIdentifier">$pref</span><span class="synConstant">&SearchCity=</span><span class="synIdentifier">$city1</span><span class="synConstant">&SearchCity2=</span><span class="synIdentifier">$city2</span><span class="synConstant">&</span><span class="synStatement">/</span>;
|
|||
|
<span class="synStatement">return</span> <span class="synIdentifier">$url</span>;
|
|||
|
}
|
|||
|
}
|
|||
|
];
|
|||
|
}
|
|||
|
};
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$result</span> = <span class="synIdentifier">$scraper</span>->scrape(<span class="synIdentifier">$page</span>);
|
|||
|
<span class="synStatement">foreach</span> <span class="synStatement">my</span> <span class="synIdentifier">$city</span> ( @{ <span class="synIdentifier">$result</span>->{<span class="synConstant">'city'</span>} } ) {
|
|||
|
<span class="synStatement">foreach</span> <span class="synStatement">my</span> <span class="synIdentifier">$page</span> ( @{ <span class="synIdentifier">$city</span>->{<span class="synConstant">'shops'</span>} } ) {
|
|||
|
<span class="synComment"># その県の店舗数を取得</span>
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$scraper</span> = scraper {
|
|||
|
process
|
|||
|
<span class="synConstant">'id("Body")/div[@class="Code"]/table[@class="H3Table01"]/tbody[1]/tr[2]/td[1]/strong[2]'</span>,
|
|||
|
<span class="synConstant">'number'</span> => <span class="synConstant">'TEXT'</span>;
|
|||
|
};
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$result</span> = <span class="synIdentifier">$scraper</span>->scrape( URI-><span class="synStatement">new</span>(<span class="synIdentifier">$page</span>) );
|
|||
|
<span class="synComment"># 店舗数に応じて、対応を変える</span>
|
|||
|
<span class="synStatement">if</span> ( <span class="synConstant"></span> == <span class="synIdentifier">$result</span>->{<span class="synConstant">'number'</span>} ) {
|
|||
|
<span class="synComment"># 店舗数が0なら、何もしない</span>
|
|||
|
<span class="synStatement">next</span>;
|
|||
|
}
|
|||
|
<span class="synComment"># 店舗数が10店舗以下の場合</span>
|
|||
|
<span class="synComment"># そのページにしか店舗情報が存在しないので、そのページから情報を取得</span>
|
|||
|
<span class="synStatement">elsif</span> ( <span class="synIdentifier">$result</span>->{<span class="synConstant">'number'</span>} < <span class="synConstant">10</span> ) {
|
|||
|
get_info(<span class="synIdentifier">$page</span>);
|
|||
|
<span class="synStatement">sleep</span>(<span class="synConstant">3</span>);
|
|||
|
}
|
|||
|
<span class="synComment"># 店舗数が10店舗より多い場合、</span>
|
|||
|
<span class="synComment"># そのページ以外にも店舗情報が存在するので、</span>
|
|||
|
<span class="synComment"># まずは店舗情報へのリンクをすべて取得する。</span>
|
|||
|
<span class="synStatement">else</span> {
|
|||
|
<span class="synIdentifier">$scraper</span> = scraper {
|
|||
|
process
|
|||
|
<span class="synConstant">'id("Body")/div[@class="Code"]/table[@class="ResultNavi"]/tbody[1]/tr[1]/td[2]/a'</span>,
|
|||
|
<span class="synConstant">'links[]'</span> => [
|
|||
|
<span class="synConstant">'@href'</span>,
|
|||
|
<span class="synIdentifier"> </span><span class="synStatement">sub</span><span class="synIdentifier"> </span>{
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$url</span> = <span class="synIdentifier">$_</span>->as_string;
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">@work</span>;
|
|||
|
<span class="synComment"># http://www.starbucks.co.jp/search/result_city.php?SearchPerfecture=%93%8C%8B%9E%93s&SearchCity=%8D%60%8B%E6&storelist=11</span>
|
|||
|
<span class="synStatement">if</span> ( <span class="synIdentifier">$url</span> =~
|
|||
|
<span class="synStatement">/</span><span class="synConstant">SearchPerfecture=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)\&</span><span class="synConstant">SearchCity=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)\&</span><span class="synStatement">/</span>
|
|||
|
)
|
|||
|
{
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$pref</span> = uri_escape(
|
|||
|
encode(
|
|||
|
<span class="synConstant">'shiftjis'</span>,
|
|||
|
decode( <span class="synConstant">'utf8'</span>, uri_unescape(<span class="synIdentifier">$1</span>) )
|
|||
|
)
|
|||
|
);
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$city</span> = uri_escape(
|
|||
|
encode(
|
|||
|
<span class="synConstant">'shiftjis'</span>,
|
|||
|
decode( <span class="synConstant">'utf8'</span>, uri_unescape(<span class="synIdentifier">$2</span>) )
|
|||
|
)
|
|||
|
);
|
|||
|
<span class="synIdentifier">$url</span> =
|
|||
|
<span class="synStatement">s/</span><span class="synConstant">SearchPerfecture=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)\&</span><span class="synConstant">SearchCity=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)\&</span><span class="synStatement">/</span><span class="synConstant">SearchPerfecture=</span><span class="synIdentifier">$pref</span><span class="synConstant">&SearchCity=</span><span class="synIdentifier">$city</span><span class="synSpecial">\&</span><span class="synStatement">/</span>;
|
|||
|
<span class="synStatement">return</span> <span class="synIdentifier">$url</span>;
|
|||
|
}
|
|||
|
}
|
|||
|
];
|
|||
|
};
|
|||
|
<span class="synIdentifier">$result</span> = <span class="synIdentifier">$scraper</span>->scrape( URI-><span class="synStatement">new</span>(<span class="synIdentifier">$page</span>) );
|
|||
|
<span class="synStatement">foreach</span> <span class="synStatement">my</span> <span class="synIdentifier">$link</span> ( @{ <span class="synIdentifier">$result</span>->{links} } ) {
|
|||
|
get_info(<span class="synIdentifier">$link</span>);
|
|||
|
<span class="synStatement">sleep</span>(<span class="synConstant">3</span>);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
<span class="synComment"># 各県のリンクをたどって、店舗情報を取得する</span>
|
|||
|
<span class="synStatement">foreach</span> <span class="synStatement">my</span> <span class="synIdentifier">$page</span> ( @{ <span class="synIdentifier">$result</span>->{prefs} } ) {
|
|||
|
<span class="synComment"># その県の店舗数を取得</span>
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$scraper</span> = scraper {
|
|||
|
process
|
|||
|
<span class="synConstant">'id("Body")/div[@class="Code"]/table[@class="H3Table01"]/tbody[1]/tr[2]/td[1]/strong[2]'</span>,
|
|||
|
<span class="synConstant">'number'</span> => <span class="synConstant">'TEXT'</span>;
|
|||
|
};
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$result</span> = <span class="synIdentifier">$scraper</span>->scrape( URI-><span class="synStatement">new</span>(<span class="synIdentifier">$page</span>) );
|
|||
|
<span class="synComment"># 店舗数に応じて、対応を変える</span>
|
|||
|
<span class="synStatement">if</span> ( <span class="synConstant"></span> == <span class="synIdentifier">$result</span>->{<span class="synConstant">'number'</span>} ) {
|
|||
|
<span class="synComment"># 店舗数が0なら、何もしない</span>
|
|||
|
<span class="synStatement">next</span>;
|
|||
|
}
|
|||
|
<span class="synComment"># 店舗数が10店舗以下の場合</span>
|
|||
|
<span class="synComment"># そのページにしか店舗情報が存在しないので、そのページから情報を取得</span>
|
|||
|
<span class="synStatement">elsif</span> ( <span class="synIdentifier">$result</span>->{<span class="synConstant">'number'</span>} < <span class="synConstant">10</span> ) {
|
|||
|
get_info(<span class="synIdentifier">$page</span>);
|
|||
|
<span class="synStatement">sleep</span>(<span class="synConstant">3</span>);
|
|||
|
}
|
|||
|
<span class="synComment"># 店舗数が10店舗より多い場合、</span>
|
|||
|
<span class="synComment"># そのページ以外にも店舗情報が存在するので、</span>
|
|||
|
<span class="synComment"># まずは店舗情報へのリンクをすべて取得する。</span>
|
|||
|
<span class="synStatement">else</span> {
|
|||
|
<span class="synIdentifier">$scraper</span> = scraper {
|
|||
|
process
|
|||
|
<span class="synConstant">'id("Body")/div[@class="Code"]/table[@class="ResultNavi"]/tbody[1]/tr[1]/td[2]/a'</span>,
|
|||
|
<span class="synConstant">'links[]'</span> => [
|
|||
|
<span class="synConstant">'@href'</span>,
|
|||
|
<span class="synIdentifier"> </span><span class="synStatement">sub</span><span class="synIdentifier"> </span>{
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$url</span> = <span class="synIdentifier">$_</span>->as_string;
|
|||
|
<span class="synStatement">if</span> ( <span class="synIdentifier">$url</span> =~<span class="synStatement"> /</span><span class="synConstant">SearchPerfecture=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)</span><span class="synStatement">/</span> ) {
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$utf8</span> = uri_unescape(<span class="synIdentifier">$1</span>);
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$temp</span> = decode( <span class="synConstant">'utf8'</span>, <span class="synIdentifier">$utf8</span> );
|
|||
|
<span class="synIdentifier">$temp</span> = encode( <span class="synConstant">'shiftjis'</span>, <span class="synIdentifier">$temp</span> );
|
|||
|
<span class="synIdentifier">$temp</span> = uri_escape(<span class="synIdentifier">$temp</span>);
|
|||
|
<span class="synIdentifier">$url</span> =~
|
|||
|
<span class="synStatement">s/</span><span class="synConstant">SearchPerfecture=</span><span class="synSpecial">(</span><span class="synConstant">[^</span><span class="synSpecial">\&</span><span class="synConstant">]</span><span class="synSpecial">+)</span><span class="synStatement">/</span><span class="synConstant">SearchPerfecture=</span><span class="synIdentifier">$temp</span><span class="synStatement">/</span>;
|
|||
|
<span class="synStatement">return</span> <span class="synIdentifier">$url</span>;
|
|||
|
}
|
|||
|
}
|
|||
|
];
|
|||
|
};
|
|||
|
<span class="synIdentifier">$result</span> = <span class="synIdentifier">$scraper</span>->scrape( URI-><span class="synStatement">new</span>(<span class="synIdentifier">$page</span>) );
|
|||
|
<span class="synStatement">foreach</span> <span class="synStatement">my</span> <span class="synIdentifier">$link</span> ( @{ <span class="synIdentifier">$result</span>->{links} } ) {
|
|||
|
get_info(<span class="synIdentifier">$link</span>);
|
|||
|
<span class="synStatement">sleep</span>(<span class="synConstant">3</span>);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
<span class="synStatement">exit</span>;
|
|||
|
<span class="synComment"># ===================</span>
|
|||
|
<span class="synComment"># === sub routine ===</span>
|
|||
|
<span class="synComment"># ===================</span>
|
|||
|
<span class="synStatement">sub</span><span class="synIdentifier"> get_info </span>{
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$page</span> = <span class="synStatement">shift</span>;
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$scraper</span> = scraper {
|
|||
|
process <span class="synConstant">'//div[contains(@class, "Table01")]'</span>, <span class="synConstant">'stores[]'</span> => scraper {
|
|||
|
process <span class="synConstant">'//tr[1]/td[2]'</span>,
|
|||
|
<span class="synConstant">'store_name'</span> => [ <span class="synConstant">'TEXT'</span>,<span class="synIdentifier"> </span><span class="synStatement">sub</span><span class="synIdentifier"> </span>{ <span class="synStatement">s/</span><span class="synConstant">^</span><span class="synSpecial">\s+</span><span class="synStatement">//o</span>; <span class="synStatement">s/</span><span class="synSpecial">\s+</span><span class="synConstant">$</span><span class="synStatement">//o</span>; } ];
|
|||
|
process <span class="synConstant">'//tr[2]/td[2]'</span>, <span class="synConstant">'place'</span> => [
|
|||
|
<span class="synConstant">'TEXT'</span>,
|
|||
|
<span class="synIdentifier"> </span><span class="synStatement">sub</span><span class="synIdentifier"> </span>{
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$str</span> = <span class="synIdentifier">$_</span>;
|
|||
|
<span class="synStatement">if</span> ( <span class="synIdentifier">$str</span> =~<span class="synStatement"> /</span><span class="synSpecial">(\d\d\d)[--](\d\d\d\d)(.+</span><span class="synConstant">$</span><span class="synSpecial">)</span><span class="synStatement">/</span> ) {
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$post_code</span> = <span class="synConstant">"</span><span class="synIdentifier">$1</span><span class="synConstant">-</span><span class="synIdentifier">$2</span><span class="synConstant">"</span>;
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$address</span> = <span class="synIdentifier">$3</span>;
|
|||
|
<span class="synIdentifier">$address</span> =~ <span class="synStatement">s/</span><span class="synSpecial">\s</span><span class="synStatement">//g</span>;
|
|||
|
<span class="synStatement">return</span> {
|
|||
|
<span class="synConstant">'whole'</span> => <span class="synIdentifier">$str</span>,
|
|||
|
<span class="synConstant">'post_code'</span> => <span class="synIdentifier">$post_code</span>,
|
|||
|
<span class="synConstant">'address'</span> => <span class="synIdentifier">$address</span>
|
|||
|
};
|
|||
|
}
|
|||
|
}
|
|||
|
]; <span class="synComment"># 818-0042</span>
|
|||
|
process <span class="synConstant">'//tr[3]/td[2]'</span>,
|
|||
|
<span class="synConstant">'tel'</span> => [ <span class="synConstant">'TEXT'</span>,<span class="synIdentifier"> </span><span class="synStatement">sub</span><span class="synIdentifier"> </span>{ <span class="synStatement">s/</span><span class="synConstant"> ^ </span><span class="synSpecial">\s+</span><span class="synStatement">//o</span>; <span class="synStatement">s/</span><span class="synSpecial">\s+</span><span class="synConstant">$</span><span class="synStatement">//o</span>; } ];
|
|||
|
process <span class="synConstant">'//tr[4]/td[2]'</span>, <span class="synConstant">'nearby_station'</span> => [
|
|||
|
<span class="synConstant">'TEXT'</span>,
|
|||
|
<span class="synIdentifier"> </span><span class="synStatement">sub</span><span class="synIdentifier"> </span>{
|
|||
|
<span class="synStatement">s/</span><span class="synConstant"> ^ </span><span class="synSpecial">\s+</span><span class="synStatement">//o</span>;
|
|||
|
<span class="synStatement">s/</span><span class="synSpecial">\s+</span><span class="synConstant">$</span><span class="synStatement">//o</span>;
|
|||
|
}
|
|||
|
];
|
|||
|
process <span class="synConstant">'//tr[5]/td[2]'</span>,
|
|||
|
<span class="synConstant">'open_close'</span> => [ <span class="synConstant">'TEXT'</span>,<span class="synIdentifier"> </span><span class="synStatement">sub</span><span class="synIdentifier"> </span>{ <span class="synStatement">s/</span><span class="synConstant">^</span><span class="synSpecial">\s+</span><span class="synStatement">//o</span>; <span class="synStatement">s/</span><span class="synSpecial">\s+</span><span class="synConstant">$</span><span class="synStatement">//o</span>; } ];
|
|||
|
}
|
|||
|
};
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$res</span> = <span class="synIdentifier">$scraper</span>->scrape( URI-><span class="synStatement">new</span>(<span class="synIdentifier">$page</span>) );
|
|||
|
say YAML::Dump(<span class="synIdentifier">$res</span>);
|
|||
|
<span class="synStatement">foreach</span> <span class="synStatement">my</span> <span class="synIdentifier">$x</span> ( @{ <span class="synIdentifier">$res</span>->{<span class="synConstant">'stores'</span>} } ) {
|
|||
|
<span class="synComment"># データベースへの接続</span>
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$dbh</span> =
|
|||
|
DBI-><span class="synStatement">connect</span>( <span class="synConstant">'dbi:mysql:dbname=データベースの名前'</span>, <span class="synConstant">'ユーザ名'</span>, <span class="synConstant">'パスワード'</span>,
|
|||
|
{ <span class="synConstant">RaiseError </span>=> <span class="synConstant">1</span>, <span class="synConstant">AutoCommit </span>=> <span class="synConstant"></span> } );
|
|||
|
<span class="synComment"># ステートメントハンドラの作成</span>
|
|||
|
<span class="synComment"># my $sth = $dbh->prepare("SELECT address FROM renoir WHERE address LIKE ?;");</span>
|
|||
|
<span class="synStatement">my</span> <span class="synIdentifier">$sth</span> = <span class="synIdentifier">$dbh</span>->prepare(
|
|||
|
<span class="synConstant">"INSERT INTO Starbucks (shopname, post_code, address, tel, hours, nearby) values (?, ?, ?, ?, ?, ?);"</span>
|
|||
|
);
|
|||
|
<span class="synIdentifier">$sth</span>->execute(
|
|||
|
<span class="synIdentifier">$x</span>->{<span class="synConstant">'store_name'</span>}, <span class="synIdentifier">$x</span>->{<span class="synConstant">'place'</span>}->{<span class="synConstant">'post_code'</span>},
|
|||
|
<span class="synIdentifier">$x</span>->{<span class="synConstant">'place'</span>}->{<span class="synConstant">'address'</span>}, <span class="synIdentifier">$x</span>->{<span class="synConstant">'tel'</span>},
|
|||
|
<span class="synIdentifier">$x</span>->{<span class="synConstant">'open_close'</span>}, <span class="synIdentifier">$x</span>->{<span class="synConstant">'nearby_station'</span>}
|
|||
|
);
|
|||
|
<span class="synComment"># ステートメントハンドラの解放</span>
|
|||
|
<span class="synIdentifier">$sth</span>->finish;
|
|||
|
<span class="synComment"># データベースハンドラの解放</span>
|
|||
|
<span class="synIdentifier">$dbh</span>->disconnect;
|
|||
|
}
|
|||
|
}
|
|||
|
</pre>
|
|||
|
</div>
|