150 lines
11 KiB
Markdown
150 lines
11 KiB
Markdown
|
---
|
||
|
title: LWPの使い方を習得中
|
||
|
author: kazu634
|
||
|
date: 2008-08-30
|
||
|
url: /2008/08/30/_1091/
|
||
|
wordtwit_post_info:
|
||
|
- 'O:8:"stdClass":13:{s:6:"manual";b:0;s:11:"tweet_times";i:1;s:5:"delay";i:0;s:7:"enabled";i:1;s:10:"separation";s:2:"60";s:7:"version";s:3:"3.7";s:14:"tweet_template";b:0;s:6:"status";i:2;s:6:"result";a:0:{}s:13:"tweet_counter";i:2;s:13:"tweet_log_ids";a:1:{i:0;i:4245;}s:9:"hash_tags";a:0:{}s:8:"accounts";a:1:{i:0;s:7:"kazu634";}}'
|
||
|
categories:
|
||
|
- LWP
|
||
|
- Perl
|
||
|
|
||
|
---
|
||
|
<div class="section">
|
||
|
<p>
|
||
|
最終目標は図書館蔵書検索をするスクリプトを作ること。
|
||
|
</p>
|
||
|
|
||
|
<h4>
|
||
|
指定したURLが指し示すものをダウンロードする
|
||
|
</h4>
|
||
|
|
||
|
<p>
|
||
|
LWP::Simpleを使うよ:
|
||
|
</p>
|
||
|
|
||
|
<pre class="syntax-highlight">
|
||
|
<span class="synComment"># === Libraries ===</span>
|
||
|
<span class="synStatement">use strict</span>;
|
||
|
<span class="synStatement">use warnings</span>;
|
||
|
<span class="synComment"># LWP & Encode modules</span>
|
||
|
<span class="synStatement">use </span>LWP::Simple;
|
||
|
<span class="synStatement">use utf8</span>;
|
||
|
<span class="synStatement">use </span>Encode;
|
||
|
<span class="synStatement">binmode</span>(<span class="synIdentifier">STDERR</span>, <span class="synConstant">':raw :encoding(utf8)'</span>);
|
||
|
<span class="synComment"># === Main part ===</span>
|
||
|
<span class="synComment"># Here set the url.</span>
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$url</span> = <span class="synConstant">"http://www.klnet.pref.kanagawa.jp/opac/OPP0200"</span>;
|
||
|
<span class="synComment"># get then content of the url.</span>
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$content</span> = get(<span class="synIdentifier">$url</span>);
|
||
|
<span class="synStatement">die</span> <span class="synConstant">"</span><span class="synIdentifier">$url</span><span class="synConstant"> を読み込めませんでした"</span> <span class="synStatement">unless</span> <span class="synStatement">defined</span> <span class="synIdentifier">$content</span>;
|
||
|
<span class="synComment"># decoding.</span>
|
||
|
<span class="synComment"># Note how to use "decode":</span>
|
||
|
<span class="synComment"># decode($content's character code, the target string)</span>
|
||
|
<span class="synIdentifier">$content</span> = decode(<span class="synConstant">'utf-8'</span>, <span class="synIdentifier">$content</span>);
|
||
|
<span class="synStatement">print</span>(<span class="synIdentifier">$content</span>);
|
||
|
</pre>
|
||
|
|
||
|
<h4>
|
||
|
LWP:UserAgent
|
||
|
</h4>
|
||
|
|
||
|
<pre class="syntax-highlight">
|
||
|
<span class="synComment"># === Libraries ===</span>
|
||
|
<span class="synStatement">use strict</span>;
|
||
|
<span class="synStatement">use warnings</span>;
|
||
|
<span class="synComment"># LWP & Encode modules</span>
|
||
|
<span class="synStatement">use </span>LWP <span class="synConstant">5.64</span>;
|
||
|
<span class="synStatement">use utf8</span>;
|
||
|
<span class="synStatement">use </span>Encode;
|
||
|
<span class="synStatement">binmode</span>(<span class="synIdentifier">STDERR</span>, <span class="synConstant">':raw :encoding(utf8)'</span>);
|
||
|
<span class="synComment"># === Main part ===</span>
|
||
|
<span class="synComment"># Here set the url.</span>
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$url</span> = <span class="synConstant">"http://www.klnet.pref.kanagawa.jp/opac/OPP0200"</span>;
|
||
|
<span class="synComment"># get then content of the url.</span>
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$browser</span> = LWP::UserAgent-><span class="synStatement">new</span>;
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$response</span> = <span class="synIdentifier">$browser</span>->get(<span class="synIdentifier">$url</span>);
|
||
|
<span class="synStatement">die</span> <span class="synConstant">"</span><span class="synIdentifier">$url</span><span class="synConstant"> を読み込めませんでした。"</span>, <span class="synIdentifier">$response</span>->status_line
|
||
|
<span class="synStatement">unless</span> <span class="synIdentifier">$response</span>->is_success;
|
||
|
<span class="synStatement">die</span> <span class="synConstant">"HTMLを読み込んだはずなのに、"</span>, <span class="synIdentifier">$response</span>->content_type,
|
||
|
<span class="synConstant">"が返ってきました。"</span>
|
||
|
<span class="synStatement">unless</span> <span class="synIdentifier">$response</span>-> content_type <span class="synStatement">eq</span> <span class="synConstant">'text/html'</span>;
|
||
|
<span class="synComment"># decoding.</span>
|
||
|
<span class="synComment"># Note how to use "decode":</span>
|
||
|
<span class="synComment"># decode($content's character code, the target string)</span>
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$content</span> = decode(<span class="synConstant">'utf8'</span>, <span class="synIdentifier">$response</span>->content);
|
||
|
<span class="synStatement">print</span> <span class="synIdentifier">$content</span>;
|
||
|
</pre>
|
||
|
|
||
|
<h4>
|
||
|
ブラウザ情報とかも一緒に送信するよ
|
||
|
</h4>
|
||
|
|
||
|
<pre class="syntax-highlight">
|
||
|
<span class="synStatement">use strict</span>;
|
||
|
<span class="synStatement">use warnings</span>;
|
||
|
<span class="synComment"># LWP module</span>
|
||
|
<span class="synStatement">use </span>LWP <span class="synConstant">5.64</span>;
|
||
|
<span class="synComment"># Character Encoding</span>
|
||
|
<span class="synStatement">use </span>Encode;
|
||
|
<span class="synStatement">use utf8</span>;
|
||
|
<span class="synStatement">binmode</span>( <span class="synIdentifier">STDERR</span>, <span class="synConstant">':raw :encoding(utf8)'</span> );
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$url</span> = <span class="synConstant">'http://tv.yahoo.co.jp/vhf/kanagawa/realtime.html'</span>;
|
||
|
<span class="synComment"># get then content of the url.</span>
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$browser</span> = LWP::UserAgent-><span class="synStatement">new</span>;
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$response</span> = <span class="synIdentifier">$browser</span>->get(
|
||
|
<span class="synIdentifier">$url</span>,
|
||
|
<span class="synConstant">'User-Agent'</span> => <span class="synConstant">'Mozilla/4.77 [en] (Win98; U)'</span>,
|
||
|
<span class="synConstant">'Accept'</span> =>
|
||
|
<span class="synConstant">'image/gif, image/x-xbitmap, image.jpeg, image.pjpeg, image/png, */*'</span>,
|
||
|
<span class="synConstant">'Accept-Encoding'</span> => <span class="synConstant">'gzip'</span>,
|
||
|
<span class="synConstant">'Accept-Language'</span> => <span class="synConstant">'ja,en'</span>,
|
||
|
<span class="synConstant">'Accept-Charset'</span> => <span class="synConstant">'iso-8859-1, *, utf8'</span>,
|
||
|
);
|
||
|
<span class="synStatement">die</span> <span class="synConstant">"</span><span class="synIdentifier">$url</span><span class="synConstant"> を読み込めませんでした。"</span>, <span class="synIdentifier">$response</span>->status_line
|
||
|
<span class="synStatement">unless</span> <span class="synIdentifier">$response</span>->is_success;
|
||
|
<span class="synStatement">die</span> <span class="synConstant">"HTMLを読み込んだはずなのに、"</span>, <span class="synIdentifier">$response</span>->content_type,
|
||
|
<span class="synConstant">"が返ってきました。"</span>
|
||
|
<span class="synStatement">unless</span> <span class="synIdentifier">$response</span>->content_type <span class="synStatement">eq</span> <span class="synConstant">'text/html'</span>;
|
||
|
<span class="synComment"># decoding.</span>
|
||
|
<span class="synComment"># Note how to use "decode":</span>
|
||
|
<span class="synComment"># decode($content's character code, the target string)</span>
|
||
|
<span class="synStatement">my</span> <span class="synIdentifier">$content</span> = decode( <span class="synConstant">'euc-jp'</span>, <span class="synIdentifier">$response</span>->content );
|
||
|
<span class="synStatement">print</span>(<span class="synIdentifier">$content</span>);
|
||
|
</pre>
|
||
|
|
||
|
<div class="hatena-asin-detail">
|
||
|
<a href="http://www.amazon.co.jp/dp/4873111870/?tag=hatena_st1-22&ascsubtag=d-7ibv" onclick="__gaTracker('send', 'event', 'outbound-article', 'http://www.amazon.co.jp/dp/4873111870/?tag=hatena_st1-22&ascsubtag=d-7ibv', '');"><img src="https://images-na.ssl-images-amazon.com/images/I/41AT4JG2KQL._SL160_.jpg" class="hatena-asin-detail-image" alt="Spidering hacks―ウェブ情報ラクラク取得テクニック101選" title="Spidering hacks―ウェブ情報ラクラク取得テクニック101選" /></a></p>
|
||
|
|
||
|
<div class="hatena-asin-detail-info">
|
||
|
<p class="hatena-asin-detail-title">
|
||
|
<a href="http://www.amazon.co.jp/dp/4873111870/?tag=hatena_st1-22&ascsubtag=d-7ibv" onclick="__gaTracker('send', 'event', 'outbound-article', 'http://www.amazon.co.jp/dp/4873111870/?tag=hatena_st1-22&ascsubtag=d-7ibv', 'Spidering hacks―ウェブ情報ラクラク取得テクニック101選');">Spidering hacks―ウェブ情報ラクラク取得テクニック101選</a>
|
||
|
</p>
|
||
|
|
||
|
<ul>
|
||
|
<li>
|
||
|
<span class="hatena-asin-detail-label">作者:</span> <a href="http://d.hatena.ne.jp/keyword/Kevin%20Hemenway" onclick="__gaTracker('send', 'event', 'outbound-article', 'http://d.hatena.ne.jp/keyword/Kevin%20Hemenway', 'Kevin Hemenway');" class="keyword">Kevin Hemenway</a>,<a href="http://d.hatena.ne.jp/keyword/Tara%20Calishain" onclick="__gaTracker('send', 'event', 'outbound-article', 'http://d.hatena.ne.jp/keyword/Tara%20Calishain', 'Tara Calishain');" class="keyword">Tara Calishain</a>,<a href="http://d.hatena.ne.jp/keyword/%C2%BC%BE%E5%B2%ED%BE%CF" onclick="__gaTracker('send', 'event', 'outbound-article', 'http://d.hatena.ne.jp/keyword/%C2%BC%BE%E5%B2%ED%BE%CF', '村上雅章');" class="keyword">村上雅章</a>
|
||
|
</li>
|
||
|
<li>
|
||
|
<span class="hatena-asin-detail-label">出版社/メーカー:</span> <a href="http://d.hatena.ne.jp/keyword/%A5%AA%A5%E9%A5%A4%A5%EA%A1%BC%A1%A6%A5%B8%A5%E3%A5%D1%A5%F3" onclick="__gaTracker('send', 'event', 'outbound-article', 'http://d.hatena.ne.jp/keyword/%A5%AA%A5%E9%A5%A4%A5%EA%A1%BC%A1%A6%A5%B8%A5%E3%A5%D1%A5%F3', 'オライリー・ジャパン');" class="keyword">オライリー・ジャパン</a>
|
||
|
</li>
|
||
|
<li>
|
||
|
<span class="hatena-asin-detail-label">発売日:</span> 2004/05
|
||
|
</li>
|
||
|
<li>
|
||
|
<span class="hatena-asin-detail-label">メディア:</span> 単行本
|
||
|
</li>
|
||
|
<li>
|
||
|
<span class="hatena-asin-detail-label">購入</span>: 52人 <span class="hatena-asin-detail-label">クリック</span>: 904回
|
||
|
</li>
|
||
|
<li>
|
||
|
<a href="http://d.hatena.ne.jp/asin/4873111870" onclick="__gaTracker('send', 'event', 'outbound-article', 'http://d.hatena.ne.jp/asin/4873111870', 'この商品を含むブログ (103件) を見る');" target="_blank">この商品を含むブログ (103件) を見る</a>
|
||
|
</li>
|
||
|
</ul>
|
||
|
</div>
|
||
|
|
||
|
<div class="hatena-asin-detail-foot">
|
||
|
</div>
|
||
|
</div>
|
||
|
</div>
|