url |
^http://oreilly.com/(?:pub/a/)?[^/]+/archive/ |
nextLink |
id('page-break')//p[@class='secondary']/b/following-sibling::a[1] |
pageElement |
(//div[@style='float:left;width:760px;padding-right:10px;']|id('article-content'))/*[not(@id='page-break')][(not(@class='secondary') and not(@class='breadcrumb')) or self::em/table]|//em[table]/*[not(@id='page-break')] |
exampleUrl |
http://oreilly.com/web2/archive/what-is-web-20.html
http://oreilly.com/ruby/archive/rails.html
http://oreilly.com/pub/a/ruby/archive/rails.html?page=2
http://oreilly.com/pub/a/java/archive/what-is-a-portlet.html |