2019-03-31 11:00:21 +00:00
---
title: スタバの店舗情報をDBに登録するスクリプト
author: kazu634
date: 2009-09-17
wordtwit_post_info:
- 'O:8:"stdClass":13:{s:6:"manual";b:0;s:11:"tweet_times";i:1;s:5:"delay";i:0;s:7:"enabled";i:1;s:10:"separation";s:2:"60";s:7:"version";s:3:"3.7";s:14:"tweet_template";b:0;s:6:"status";i:2;s:6:"result";a:0:{}s:13:"tweet_counter";i:2;s:13:"tweet_log_ids";a:1:{i:0;i:4783;}s:9:"hash_tags";a:0:{}s:8:"accounts";a:1:{i:0;s:7:"kazu634";}}'
categories:
- Perl
- starbucks
---
< div class = "section" >
< p >
ようやく完成しました。Web::Scraperって、奥が深いです。やろうと考えてから、実際にできあがるまで、かなり時間がたっているような気がする…が、あまり気にしないようにします。
< / p >
< h4 >
ソース
< / h4 >
< pre class = "syntax-highlight" >
< span class = "synPreProc" > #!/usr/bin/perl< / span >
< span class = "synStatement" > use strict< / span > ;
< span class = "synStatement" > use < / span > Web::Scraper;
< span class = "synStatement" > use < / span > URI;
< span class = "synStatement" > use < / span > URI::Escape;
< span class = "synStatement" > use utf8< / span > ;
< span class = "synStatement" > use < / span > YAML;
< span class = "synStatement" > use < / span > Perl6::Say;
< span class = "synStatement" > use < / span > Encode;
< span class = "synStatement" > use < / span > DBI;
< span class = "synComment" > # =========================< / span >
< span class = "synComment" > # === 各県へのURLを取得 ===< / span >
< span class = "synComment" > # =========================< / span >
< span class = "synComment" > # starbucksのURLを指定< / span >
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $uri< / span > = URI-> < span class = "synStatement" > new< / span > (< span class = "synConstant" > " http://www.starbucks.co.jp/search/index.html" < / span > );
< span class = "synComment" > # スクレイピングの設定を行う< / span >
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $scraper< / span > = scraper {
process < span class = "synConstant" > '//area[@href=~/.+SearchPerfecture/]'< / span > , < span class = "synConstant" > 'prefs[]'< / span > => [
< span class = "synConstant" > '@href'< / span > ,
< span class = "synIdentifier" > < / span > < span class = "synStatement" > sub< / span > < span class = "synIdentifier" > < / span > {
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $url< / span > = < span class = "synIdentifier" > $_< / span > -> as_string;
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > @url_split< / span > = < span class = "synStatement" > split< / span > ( /=/, < span class = "synIdentifier" > $url< / span > );
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $utf8_encode< / span > = < span class = "synStatement" > pop< / span > (< span class = "synIdentifier" > @url_split< / span > );
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $utf8< / span > = uri_unescape(< span class = "synIdentifier" > $utf8_encode< / span > );
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $temp< / span > = decode( < span class = "synConstant" > 'utf8'< / span > , < span class = "synIdentifier" > $utf8< / span > );
< span class = "synIdentifier" > $temp< / span > = encode( < span class = "synConstant" > 'shiftjis'< / span > , < span class = "synIdentifier" > $temp< / span > );
< span class = "synIdentifier" > $temp< / span > = uri_escape(< span class = "synIdentifier" > $temp< / span > );
< span class = "synStatement" > push< / span > ( < span class = "synIdentifier" > @url_split< / span > , < span class = "synIdentifier" > $temp< / span > );
< span class = "synStatement" > return< / span > < span class = "synStatement" > join< / span > ( < span class = "synConstant" > '='< / span > , < span class = "synIdentifier" > @url_split< / span > );
}
];
process < span class = "synConstant" > '//td[@class=" SelectFromPlace" ]//a'< / span > , < span class = "synConstant" > 'cities[]'< / span > => < span class = "synConstant" > '@href'< / span > ;
};
< span class = "synComment" > # スクレイピングの実行< / span >
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $result< / span > = < span class = "synIdentifier" > $scraper< / span > -> scrape(< span class = "synIdentifier" > $uri< / span > );
< span class = "synStatement" > foreach< / span > < span class = "synStatement" > my< / span > < span class = "synIdentifier" > $page< / span > ( @{ < span class = "synIdentifier" > $result< / span > -> {< span class = "synConstant" > 'cities'< / span > } } ) {
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $scraper< / span > = scraper {
process < span class = "synConstant" > '//map[@name=~/[^_]+_Map/]'< / span > , < span class = "synConstant" > 'city[]'< / span > => scraper {
process < span class = "synConstant" > '//area[@href=~/.+result_city3.php/]'< / span > , < span class = "synConstant" > 'shops[]'< / span > => [
< span class = "synConstant" > '@href'< / span > ,
< span class = "synIdentifier" > < / span > < span class = "synStatement" > sub< / span > < span class = "synIdentifier" > < / span > {
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $url< / span > = < span class = "synIdentifier" > $_< / span > -> as_string;
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > @work< / span > ;
< span class = "synStatement" > if< / span > ( < span class = "synIdentifier" > $url< / span > =~
< span class = "synStatement" > /</ span >< span class = "synConstant" > SearchPerfecture=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)\& ;</ span >< span class = "synConstant" > SearchCity=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)\& ;</ span >< span class = "synConstant" > SearchCity2=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)</ span >< span class = "synConstant" > & </ span >< span class = "synStatement" > /</ span >
)
{
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $pref< / span > = uri_escape(
encode(
< span class = "synConstant" > 'shiftjis'< / span > , decode( < span class = "synConstant" > 'utf8'< / span > , uri_unescape(< span class = "synIdentifier" > $1< / span > ) )
)
);
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $city1< / span > = uri_escape(
encode(
< span class = "synConstant" > 'shiftjis'< / span > , decode( < span class = "synConstant" > 'utf8'< / span > , uri_unescape(< span class = "synIdentifier" > $2< / span > ) )
)
);
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $city2< / span > = uri_escape(
encode(
< span class = "synConstant" > 'shiftjis'< / span > , decode( < span class = "synConstant" > 'utf8'< / span > , uri_unescape(< span class = "synIdentifier" > $3< / span > ) )
)
);
< span class = "synIdentifier" > $url< / span > =
< span class = "synStatement" > s/</ span >< span class = "synConstant" > SearchPerfecture=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)\& ;</ span >< span class = "synConstant" > SearchCity=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)\& ;</ span >< span class = "synConstant" > SearchCity2=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)</ span >< span class = "synConstant" > & </ span >< span class = "synStatement" > /</ span >< span class = "synConstant" > SearchPerfecture=</ span >< span class = "synIdentifier" > $pref</ span >< span class = "synConstant" > & SearchCity=</ span >< span class = "synIdentifier" > $city1</ span >< span class = "synConstant" > & SearchCity2=</ span >< span class = "synIdentifier" > $city2</ span >< span class = "synConstant" > & </ span >< span class = "synStatement" > /</ span > ;
< span class = "synStatement" > return< / span > < span class = "synIdentifier" > $url< / span > ;
}
}
];
}
};
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $result< / span > = < span class = "synIdentifier" > $scraper< / span > -> scrape(< span class = "synIdentifier" > $page< / span > );
< span class = "synStatement" > foreach< / span > < span class = "synStatement" > my< / span > < span class = "synIdentifier" > $city< / span > ( @{ < span class = "synIdentifier" > $result< / span > -> {< span class = "synConstant" > 'city'< / span > } } ) {
< span class = "synStatement" > foreach< / span > < span class = "synStatement" > my< / span > < span class = "synIdentifier" > $page< / span > ( @{ < span class = "synIdentifier" > $city< / span > -> {< span class = "synConstant" > 'shops'< / span > } } ) {
< span class = "synComment" > # その県の店舗数を取得< / span >
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $scraper< / span > = scraper {
process
< span class = "synConstant" > 'id(" Body" )/div[@class=" Code" ]/table[@class=" H3Table01" ]/tbody[1]/tr[2]/td[1]/strong[2]'< / span > ,
< span class = "synConstant" > 'number'< / span > => < span class = "synConstant" > 'TEXT'< / span > ;
};
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $result< / span > = < span class = "synIdentifier" > $scraper< / span > -> scrape( URI-> < span class = "synStatement" > new< / span > (< span class = "synIdentifier" > $page< / span > ) );
< span class = "synComment" > # 店舗数に応じて、対応を変える< / span >
< span class = "synStatement" > if< / span > ( < span class = "synConstant" > < / span > == < span class = "synIdentifier" > $result< / span > -> {< span class = "synConstant" > 'number'< / span > } ) {
< span class = "synComment" > # 店舗数が0なら、何もしない< / span >
< span class = "synStatement" > next< / span > ;
}
< span class = "synComment" > # 店舗数が10店舗以下の場合< / span >
< span class = "synComment" > # そのページにしか店舗情報が存在しないので、そのページから情報を取得< / span >
< span class = "synStatement" > elsif< / span > ( < span class = "synIdentifier" > $result< / span > -> {< span class = "synConstant" > 'number'< / span > } < < span class = "synConstant" > 10< / span > ) {
get_info(< span class = "synIdentifier" > $page< / span > );
< span class = "synStatement" > sleep< / span > (< span class = "synConstant" > 3< / span > );
}
< span class = "synComment" > # 店舗数が10店舗より多い場合、< / span >
< span class = "synComment" > # そのページ以外にも店舗情報が存在するので、< / span >
< span class = "synComment" > # まずは店舗情報へのリンクをすべて取得する。< / span >
< span class = "synStatement" > else< / span > {
< span class = "synIdentifier" > $scraper< / span > = scraper {
process
< span class = "synConstant" > 'id(" Body" )/div[@class=" Code" ]/table[@class=" ResultNavi" ]/tbody[1]/tr[1]/td[2]/a'< / span > ,
< span class = "synConstant" > 'links[]'< / span > => [
< span class = "synConstant" > '@href'< / span > ,
< span class = "synIdentifier" > < / span > < span class = "synStatement" > sub< / span > < span class = "synIdentifier" > < / span > {
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $url< / span > = < span class = "synIdentifier" > $_< / span > -> as_string;
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > @work< / span > ;
< span class = "synComment" > # http://www.starbucks.co.jp/search/result_city.php?SearchPerfecture=%93%8C%8B%9E%93s& SearchCity=%8D%60%8B%E6& storelist=11< / span >
< span class = "synStatement" > if< / span > ( < span class = "synIdentifier" > $url< / span > =~
< span class = "synStatement" > /</ span >< span class = "synConstant" > SearchPerfecture=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)\& ;</ span >< span class = "synConstant" > SearchCity=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)\& ;</ span >< span class = "synStatement" > /</ span >
)
{
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $pref< / span > = uri_escape(
encode(
< span class = "synConstant" > 'shiftjis'< / span > ,
decode( < span class = "synConstant" > 'utf8'< / span > , uri_unescape(< span class = "synIdentifier" > $1< / span > ) )
)
);
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $city< / span > = uri_escape(
encode(
< span class = "synConstant" > 'shiftjis'< / span > ,
decode( < span class = "synConstant" > 'utf8'< / span > , uri_unescape(< span class = "synIdentifier" > $2< / span > ) )
)
);
< span class = "synIdentifier" > $url< / span > =
< span class = "synStatement" > s/</ span >< span class = "synConstant" > SearchPerfecture=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)\& ;</ span >< span class = "synConstant" > SearchCity=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)\& ;</ span >< span class = "synStatement" > /</ span >< span class = "synConstant" > SearchPerfecture=</ span >< span class = "synIdentifier" > $pref</ span >< span class = "synConstant" > & SearchCity=</ span >< span class = "synIdentifier" > $city</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synStatement" > /</ span > ;
< span class = "synStatement" > return< / span > < span class = "synIdentifier" > $url< / span > ;
}
}
];
};
< span class = "synIdentifier" > $result< / span > = < span class = "synIdentifier" > $scraper< / span > -> scrape( URI-> < span class = "synStatement" > new< / span > (< span class = "synIdentifier" > $page< / span > ) );
< span class = "synStatement" > foreach< / span > < span class = "synStatement" > my< / span > < span class = "synIdentifier" > $link< / span > ( @{ < span class = "synIdentifier" > $result< / span > -> {links} } ) {
get_info(< span class = "synIdentifier" > $link< / span > );
< span class = "synStatement" > sleep< / span > (< span class = "synConstant" > 3< / span > );
}
}
}
}
}
< span class = "synComment" > # 各県のリンクをたどって、店舗情報を取得する< / span >
< span class = "synStatement" > foreach< / span > < span class = "synStatement" > my< / span > < span class = "synIdentifier" > $page< / span > ( @{ < span class = "synIdentifier" > $result< / span > -> {prefs} } ) {
< span class = "synComment" > # その県の店舗数を取得< / span >
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $scraper< / span > = scraper {
process
< span class = "synConstant" > 'id(" Body" )/div[@class=" Code" ]/table[@class=" H3Table01" ]/tbody[1]/tr[2]/td[1]/strong[2]'< / span > ,
< span class = "synConstant" > 'number'< / span > => < span class = "synConstant" > 'TEXT'< / span > ;
};
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $result< / span > = < span class = "synIdentifier" > $scraper< / span > -> scrape( URI-> < span class = "synStatement" > new< / span > (< span class = "synIdentifier" > $page< / span > ) );
< span class = "synComment" > # 店舗数に応じて、対応を変える< / span >
< span class = "synStatement" > if< / span > ( < span class = "synConstant" > < / span > == < span class = "synIdentifier" > $result< / span > -> {< span class = "synConstant" > 'number'< / span > } ) {
< span class = "synComment" > # 店舗数が0なら、何もしない< / span >
< span class = "synStatement" > next< / span > ;
}
< span class = "synComment" > # 店舗数が10店舗以下の場合< / span >
< span class = "synComment" > # そのページにしか店舗情報が存在しないので、そのページから情報を取得< / span >
< span class = "synStatement" > elsif< / span > ( < span class = "synIdentifier" > $result< / span > -> {< span class = "synConstant" > 'number'< / span > } < < span class = "synConstant" > 10< / span > ) {
get_info(< span class = "synIdentifier" > $page< / span > );
< span class = "synStatement" > sleep< / span > (< span class = "synConstant" > 3< / span > );
}
< span class = "synComment" > # 店舗数が10店舗より多い場合、< / span >
< span class = "synComment" > # そのページ以外にも店舗情報が存在するので、< / span >
< span class = "synComment" > # まずは店舗情報へのリンクをすべて取得する。< / span >
< span class = "synStatement" > else< / span > {
< span class = "synIdentifier" > $scraper< / span > = scraper {
process
< span class = "synConstant" > 'id(" Body" )/div[@class=" Code" ]/table[@class=" ResultNavi" ]/tbody[1]/tr[1]/td[2]/a'< / span > ,
< span class = "synConstant" > 'links[]'< / span > => [
< span class = "synConstant" > '@href'< / span > ,
< span class = "synIdentifier" > < / span > < span class = "synStatement" > sub< / span > < span class = "synIdentifier" > < / span > {
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $url< / span > = < span class = "synIdentifier" > $_< / span > -> as_string;
< span class = "synStatement" > if</ span > ( < span class = "synIdentifier" > $url</ span > =~< span class = "synStatement" > /</ span >< span class = "synConstant" > SearchPerfecture=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)</ span >< span class = "synStatement" > /</ span > ) {
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $utf8< / span > = uri_unescape(< span class = "synIdentifier" > $1< / span > );
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $temp< / span > = decode( < span class = "synConstant" > 'utf8'< / span > , < span class = "synIdentifier" > $utf8< / span > );
< span class = "synIdentifier" > $temp< / span > = encode( < span class = "synConstant" > 'shiftjis'< / span > , < span class = "synIdentifier" > $temp< / span > );
< span class = "synIdentifier" > $temp< / span > = uri_escape(< span class = "synIdentifier" > $temp< / span > );
< span class = "synIdentifier" > $url< / span > =~
< span class = "synStatement" > s/</ span >< span class = "synConstant" > SearchPerfecture=</ span >< span class = "synSpecial" > (</ span >< span class = "synConstant" > [^</ span >< span class = "synSpecial" > \& ;</ span >< span class = "synConstant" > ]</ span >< span class = "synSpecial" > +)</ span >< span class = "synStatement" > /</ span >< span class = "synConstant" > SearchPerfecture=</ span >< span class = "synIdentifier" > $temp</ span >< span class = "synStatement" > /</ span > ;
< span class = "synStatement" > return< / span > < span class = "synIdentifier" > $url< / span > ;
}
}
];
};
< span class = "synIdentifier" > $result< / span > = < span class = "synIdentifier" > $scraper< / span > -> scrape( URI-> < span class = "synStatement" > new< / span > (< span class = "synIdentifier" > $page< / span > ) );
< span class = "synStatement" > foreach< / span > < span class = "synStatement" > my< / span > < span class = "synIdentifier" > $link< / span > ( @{ < span class = "synIdentifier" > $result< / span > -> {links} } ) {
get_info(< span class = "synIdentifier" > $link< / span > );
< span class = "synStatement" > sleep< / span > (< span class = "synConstant" > 3< / span > );
}
}
}
< span class = "synStatement" > exit< / span > ;
< span class = "synComment" > # ===================< / span >
< span class = "synComment" > # === sub routine ===< / span >
< span class = "synComment" > # ===================< / span >
< span class = "synStatement" > sub< / span > < span class = "synIdentifier" > get_info < / span > {
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $page< / span > = < span class = "synStatement" > shift< / span > ;
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $scraper< / span > = scraper {
process < span class = "synConstant" > '//div[contains(@class, " Table01" )]'< / span > , < span class = "synConstant" > 'stores[]'< / span > => scraper {
process < span class = "synConstant" > '//tr[1]/td[2]'< / span > ,
< span class = "synConstant" > 'store_name'</ span > => [ < span class = "synConstant" > 'TEXT'</ span > ,< span class = "synIdentifier" > </ span >< span class = "synStatement" > sub</ span >< span class = "synIdentifier" > </ span > { < span class = "synStatement" > s/</ span >< span class = "synConstant" > ^</ span >< span class = "synSpecial" > \s+</ span >< span class = "synStatement" > //o</ span > ; < span class = "synStatement" > s/</ span >< span class = "synSpecial" > \s+</ span >< span class = "synConstant" > $</ span >< span class = "synStatement" > //o</ span > ; } ];
process < span class = "synConstant" > '//tr[2]/td[2]'< / span > , < span class = "synConstant" > 'place'< / span > => [
< span class = "synConstant" > 'TEXT'< / span > ,
< span class = "synIdentifier" > < / span > < span class = "synStatement" > sub< / span > < span class = "synIdentifier" > < / span > {
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $str< / span > = < span class = "synIdentifier" > $_< / span > ;
< span class = "synStatement" > if</ span > ( < span class = "synIdentifier" > $str</ span > =~< span class = "synStatement" > /</ span >< span class = "synSpecial" > (\d\d\d)[-- ](\d\d\d\d)(.+</ span >< span class = "synConstant" > $</ span >< span class = "synSpecial" > )</ span >< span class = "synStatement" > /</ span > ) {
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $post_code< / span > = < span class = "synConstant" > " < / span > < span class = "synIdentifier" > $1< / span > < span class = "synConstant" > -< / span > < span class = "synIdentifier" > $2< / span > < span class = "synConstant" > " < / span > ;
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $address< / span > = < span class = "synIdentifier" > $3< / span > ;
< span class = "synIdentifier" > $address</ span > =~ < span class = "synStatement" > s/</ span >< span class = "synSpecial" > \s</ span >< span class = "synStatement" > //g</ span > ;
< span class = "synStatement" > return< / span > {
< span class = "synConstant" > 'whole'< / span > => < span class = "synIdentifier" > $str< / span > ,
< span class = "synConstant" > 'post_code'< / span > => < span class = "synIdentifier" > $post_code< / span > ,
< span class = "synConstant" > 'address'< / span > => < span class = "synIdentifier" > $address< / span >
};
}
}
]; < span class = "synComment" > # 818- 0042< / span >
process < span class = "synConstant" > '//tr[3]/td[2]'< / span > ,
< span class = "synConstant" > 'tel'</ span > => [ < span class = "synConstant" > 'TEXT'</ span > ,< span class = "synIdentifier" > </ span >< span class = "synStatement" > sub</ span >< span class = "synIdentifier" > </ span > { < span class = "synStatement" > s/</ span >< span class = "synConstant" > ^ </ span >< span class = "synSpecial" > \s+</ span >< span class = "synStatement" > //o</ span > ; < span class = "synStatement" > s/</ span >< span class = "synSpecial" > \s+</ span >< span class = "synConstant" > $</ span >< span class = "synStatement" > //o</ span > ; } ];
process < span class = "synConstant" > '//tr[4]/td[2]'< / span > , < span class = "synConstant" > 'nearby_station'< / span > => [
< span class = "synConstant" > 'TEXT'< / span > ,
< span class = "synIdentifier" > < / span > < span class = "synStatement" > sub< / span > < span class = "synIdentifier" > < / span > {
< span class = "synStatement" > s/</ span >< span class = "synConstant" > ^ </ span >< span class = "synSpecial" > \s+</ span >< span class = "synStatement" > //o</ span > ;
< span class = "synStatement" > s/</ span >< span class = "synSpecial" > \s+</ span >< span class = "synConstant" > $</ span >< span class = "synStatement" > //o</ span > ;
}
];
process < span class = "synConstant" > '//tr[5]/td[2]'< / span > ,
< span class = "synConstant" > 'open_close'</ span > => [ < span class = "synConstant" > 'TEXT'</ span > ,< span class = "synIdentifier" > </ span >< span class = "synStatement" > sub</ span >< span class = "synIdentifier" > </ span > { < span class = "synStatement" > s/</ span >< span class = "synConstant" > ^</ span >< span class = "synSpecial" > \s+</ span >< span class = "synStatement" > //o</ span > ; < span class = "synStatement" > s/</ span >< span class = "synSpecial" > \s+</ span >< span class = "synConstant" > $</ span >< span class = "synStatement" > //o</ span > ; } ];
}
};
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $res< / span > = < span class = "synIdentifier" > $scraper< / span > -> scrape( URI-> < span class = "synStatement" > new< / span > (< span class = "synIdentifier" > $page< / span > ) );
say YAML::Dump(< span class = "synIdentifier" > $res< / span > );
< span class = "synStatement" > foreach< / span > < span class = "synStatement" > my< / span > < span class = "synIdentifier" > $x< / span > ( @{ < span class = "synIdentifier" > $res< / span > -> {< span class = "synConstant" > 'stores'< / span > } } ) {
< span class = "synComment" > # データベースへの接続< / span >
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $dbh< / span > =
DBI-> < span class = "synStatement" > connect< / span > ( < span class = "synConstant" > 'dbi:mysql:dbname=データベースの名前'< / span > , < span class = "synConstant" > 'ユーザ名'< / span > , < span class = "synConstant" > 'パスワード'< / span > ,
{ < span class = "synConstant" > RaiseError < / span > => < span class = "synConstant" > 1< / span > , < span class = "synConstant" > AutoCommit < / span > => < span class = "synConstant" > < / span > } );
< span class = "synComment" > # ステートメントハンドラの作成< / span >
< span class = "synComment" > # my $sth = $dbh-> prepare(" SELECT address FROM renoir WHERE address LIKE ?;" );< / span >
< span class = "synStatement" > my< / span > < span class = "synIdentifier" > $sth< / span > = < span class = "synIdentifier" > $dbh< / span > -> prepare(
< span class = "synConstant" > " INSERT INTO Starbucks (shopname, post_code, address, tel, hours, nearby) values (?, ?, ?, ?, ?, ?);" < / span >
);
< span class = "synIdentifier" > $sth< / span > -> execute(
< span class = "synIdentifier" > $x< / span > -> {< span class = "synConstant" > 'store_name'< / span > }, < span class = "synIdentifier" > $x< / span > -> {< span class = "synConstant" > 'place'< / span > }-> {< span class = "synConstant" > 'post_code'< / span > },
< span class = "synIdentifier" > $x< / span > -> {< span class = "synConstant" > 'place'< / span > }-> {< span class = "synConstant" > 'address'< / span > }, < span class = "synIdentifier" > $x< / span > -> {< span class = "synConstant" > 'tel'< / span > },
< span class = "synIdentifier" > $x< / span > -> {< span class = "synConstant" > 'open_close'< / span > }, < span class = "synIdentifier" > $x< / span > -> {< span class = "synConstant" > 'nearby_station'< / span > }
);
< span class = "synComment" > # ステートメントハンドラの解放< / span >
< span class = "synIdentifier" > $sth< / span > -> finish;
< span class = "synComment" > # データベースハンドラの解放< / span >
< span class = "synIdentifier" > $dbh< / span > -> disconnect;
}
}
< / pre >
2019-04-02 16:06:15 +00:00
< / div >