72 lines
4.7 KiB
Markdown
72 lines
4.7 KiB
Markdown
---
|
|
title: LWPをつかってみる
|
|
author: kazu634
|
|
date: 2008-09-04
|
|
wordtwit_post_info:
|
|
- 'O:8:"stdClass":13:{s:6:"manual";b:0;s:11:"tweet_times";i:1;s:5:"delay";i:0;s:7:"enabled";i:1;s:10:"separation";s:2:"60";s:7:"version";s:3:"3.7";s:14:"tweet_template";b:0;s:6:"status";i:2;s:6:"result";a:0:{}s:13:"tweet_counter";i:2;s:13:"tweet_log_ids";a:1:{i:0;i:4255;}s:9:"hash_tags";a:0:{}s:8:"accounts";a:1:{i:0;s:7:"kazu634";}}'
|
|
categories:
|
|
- LWP
|
|
- Perl
|
|
|
|
---
|
|
<div class="section">
|
|
<p>
|
|
Gaucheの方はとりあえず頭の働く土日へ…
|
|
</p>
|
|
|
|
<h4>
|
|
相対URLを絶対URLへ
|
|
</h4>
|
|
|
|
<pre class="syntax-highlight">
|
|
<span class="synComment"># === Libraries ===</span>
|
|
<span class="synStatement">use strict</span>;
|
|
<span class="synStatement">use warnings</span>;
|
|
<span class="synComment"># LWP module</span>
|
|
<span class="synStatement">use </span>LWP <span class="synConstant">5.64</span>;
|
|
<span class="synComment"># Character Encoding</span>
|
|
<span class="synStatement">use </span>Encode;
|
|
<span class="synStatement">use utf8</span>;
|
|
<span class="synStatement">binmode</span>(<span class="synIdentifier">STDERR</span>, <span class="synConstant">':raw :encoding(utf8)'</span>);
|
|
<span class="synStatement">my</span> <span class="synIdentifier">$url</span> = <span class="synConstant">'http://www.cpan.org/RECENT.html'</span>;
|
|
<span class="synComment"># get then content of the url.</span>
|
|
<span class="synStatement">my</span> <span class="synIdentifier">$browser</span> = LWP::UserAgent-><span class="synStatement">new</span>;
|
|
<span class="synStatement">my</span> <span class="synIdentifier">$response</span> = <span class="synIdentifier">$browser</span>->get(
|
|
<span class="synIdentifier">$url</span>,
|
|
<span class="synConstant">'User-Agent'</span> => <span class="synConstant">'Mozilla/4.77 [en] (Win98; U)'</span>,
|
|
<span class="synConstant">'Accept'</span> =>
|
|
<span class="synConstant">'image/gif, image/x-xbitmap, image.jpeg, image.pjpeg, image/png, */*'</span>,
|
|
<span class="synConstant">'Accept-Encoding'</span> => <span class="synConstant">'gzip'</span>,
|
|
<span class="synConstant">'Accept-Language'</span> => <span class="synConstant">'ja,en'</span>,
|
|
<span class="synConstant">'Accept-Charset'</span> => <span class="synConstant">'iso-8859-1, *, utf8'</span>,
|
|
);
|
|
<span class="synStatement">die</span> <span class="synConstant">"</span><span class="synIdentifier">$url</span><span class="synConstant"> を読み込めませんでした。"</span>, <span class="synIdentifier">$response</span>->status_line
|
|
<span class="synStatement">unless</span> <span class="synIdentifier">$response</span>->is_success;
|
|
<span class="synStatement">die</span> <span class="synConstant">"HTMLを読み込んだはずなのに、"</span>, <span class="synIdentifier">$response</span>->content_type,
|
|
<span class="synConstant">"が返ってきました。"</span>
|
|
<span class="synStatement">unless</span> <span class="synIdentifier">$response</span>-> content_type <span class="synStatement">eq</span> <span class="synConstant">'text/html'</span>;
|
|
<span class="synComment"># decoding.</span>
|
|
<span class="synComment"># Note how to use "decode":</span>
|
|
<span class="synComment"># decode($content's character code, the target string)</span>
|
|
<span class="synStatement">my</span> <span class="synIdentifier">$content</span> = decode(<span class="synConstant">'shiftjis'</span>, <span class="synIdentifier">$response</span>->content);
|
|
<span class="synComment"># ここで相対URLを絶対URLに変換している</span>
|
|
<span class="synStatement">while</span> ( <span class="synIdentifier">$content</span> =~ <span class="synStatement">m/</span><span class="synConstant"><A HREF=</span><span class="synSpecial">\"(.*?)\"</span><span class="synStatement">/g</span> ) {
|
|
<span class="synStatement">print</span>(<span class="synIdentifier">URI</span>->new_abs( <span class="synIdentifier">$1</span>, <span class="synIdentifier">$response</span>->base ), <span class="synConstant">"</span><span class="synSpecial">\n</span><span class="synConstant">"</span>);
|
|
}
|
|
</pre>
|
|
|
|
<p>
|
|
ここの
|
|
</p>
|
|
|
|
<pre class="syntax-highlight">
|
|
<span class="synStatement">while</span> ( <span class="synIdentifier">$content</span> =~ <span class="synStatement">m/</span><span class="synConstant"><A HREF=</span><span class="synSpecial">\"(.*?)\"</span><span class="synStatement">/g</span> ) {
|
|
<span class="synStatement">print</span>(<span class="synIdentifier">URI</span>->new_abs( <span class="synIdentifier">$1</span>, <span class="synIdentifier">$response</span>->base ), <span class="synConstant">"</span><span class="synSpecial">\n</span><span class="synConstant">"</span>);
|
|
}
|
|
</pre>
|
|
|
|
<p>
|
|
がポイントだよ。
|
|
</p>
|
|
</div>
|