62 lines
4.2 KiB
Markdown
62 lines
4.2 KiB
Markdown
---
|
||
title: 銀座ルノアールの店舗情報を取得
|
||
author: kazu634
|
||
date: 2009-06-21
|
||
wordtwit_post_info:
|
||
- 'O:8:"stdClass":13:{s:6:"manual";b:0;s:11:"tweet_times";i:1;s:5:"delay";i:0;s:7:"enabled";i:1;s:10:"separation";s:2:"60";s:7:"version";s:3:"3.7";s:14:"tweet_template";b:0;s:6:"status";i:2;s:6:"result";a:0:{}s:13:"tweet_counter";i:2;s:13:"tweet_log_ids";a:1:{i:0;i:4659;}s:9:"hash_tags";a:0:{}s:8:"accounts";a:1:{i:0;s:7:"kazu634";}}'
|
||
categories:
|
||
- Perl
|
||
|
||
---
|
||
<div class="section">
|
||
<p>
|
||
Google Mapsにマッピングしようとして、住所情報を取得するPerlスクリプトを作成してみました(スターバックスはちょっと難しいので後でやる)。
|
||
</p>
|
||
|
||
<p>
|
||
Web::Scraperは便利だ♪
|
||
</p>
|
||
|
||
<pre class="syntax-highlight">
|
||
<span class="synComment"># === Libraries ===</span>
|
||
<span class="synStatement">use strict</span>;
|
||
<span class="synStatement">use warnings</span>;
|
||
<span class="synStatement">use </span>URI;
|
||
<span class="synStatement">use </span>Web::Scraper;
|
||
<span class="synStatement">use </span>YAML;
|
||
<span class="synStatement">use </span>Encode;
|
||
<span class="synStatement">use utf8</span>;
|
||
<span class="synStatement">my</span> <span class="synIdentifier">@address</span>;
|
||
<span class="synComment"># === Main part ===</span>
|
||
<span class="synStatement">my</span> <span class="synIdentifier">$frame</span> = scraper {
|
||
process <span class="synConstant">'//td[@class="line_a" and @bgcolor="#ffffff"]//a'</span>,
|
||
<span class="synConstant">'shop[]'</span> => <span class="synConstant">'@href'</span>;
|
||
};
|
||
<span class="synStatement">my</span> <span class="synIdentifier">$res</span> =
|
||
<span class="synIdentifier">$frame</span>->scrape( URI-><span class="synStatement">new</span>(<span class="synConstant">"http://www.ginza-renoir.co.jp/renoir/index.htm"</span>) );
|
||
<span class="synComment"># print encode('utf8', YAML::Dump($result->{body}));</span>
|
||
<span class="synStatement">foreach</span> <span class="synStatement">my</span> <span class="synIdentifier">$x</span> ( @{ <span class="synIdentifier">$res</span>->{shop} } ) {
|
||
<span class="synStatement">my</span> <span class="synIdentifier">$main</span> = scraper {
|
||
process <span class="synConstant">'//td[@bgcolor="#ffffff" and @align="left"]'</span>,
|
||
<span class="synConstant">'shopinfo[]'</span> => <span class="synConstant">'TEXT'</span>;
|
||
};
|
||
<span class="synStatement">my</span> <span class="synIdentifier">$part</span> = scraper {
|
||
process
|
||
<span class="synConstant">'/html/body/center[2]/center/table/tbody/tr/td[2]/table/tbody/tr/td/table/tbody'</span>,
|
||
<span class="synConstant">'shop'</span> => <span class="synIdentifier">$main</span>;
|
||
result <span class="synConstant">'shop'</span>;
|
||
};
|
||
<span class="synStatement">my</span> <span class="synIdentifier">$result</span> = <span class="synIdentifier">$part</span>->scrape(
|
||
URI-><span class="synStatement">new</span>(<span class="synIdentifier">$x</span>) );
|
||
<span class="synStatement">foreach</span> <span class="synStatement">my</span> <span class="synIdentifier">$y</span> ( @{ <span class="synIdentifier">$result</span>->{shopinfo} } ) {
|
||
<span class="synStatement">push</span>(<span class="synIdentifier">@address</span>, encode(<span class="synConstant">'utf8'</span>, <span class="synIdentifier">$y</span>)) <span class="synStatement">if</span> (<span class="synIdentifier">$y</span> =~<span class="synStatement"> /</span><span class="synConstant">^東京都</span><span class="synStatement">/</span>);
|
||
<span class="synStatement">push</span>(<span class="synIdentifier">@address</span>, encode(<span class="synConstant">'utf8'</span>, <span class="synIdentifier">$y</span>)) <span class="synStatement">if</span> (<span class="synIdentifier">$y</span> =~<span class="synStatement"> /</span><span class="synConstant">^神奈川県</span><span class="synStatement">/</span>);
|
||
}
|
||
<span class="synStatement">foreach</span> (<span class="synIdentifier">@address</span>) {
|
||
<span class="synStatement">print</span>(<span class="synConstant">"</span><span class="synIdentifier">$_</span><span class="synSpecial">\n</span><span class="synConstant">"</span>);
|
||
}
|
||
<span class="synComment"># print encode( 'utf8', YAML::Dump($result) );</span>
|
||
}
|
||
</pre>
|
||
</div>
|