ORG.oclc.resources.html
Class HTMLResource
java.lang.Object
|
+--ORG.oclc.resources.html.HTMLResource
- public class HTMLResource
- extends Object
- implements NetworkResource
title
protected String title
titleIsSet
protected boolean titleIsSet
copyRightIsSet
protected boolean copyRightIsSet
pubFromCRText
protected String pubFromCRText
dateFromCRText
protected String dateFromCRText
copyrightFromCRText
protected String copyrightFromCRText
excerpt
protected String excerpt
excerptIsSet
protected boolean excerptIsSet
content
protected String content
cleanContent
protected String cleanContent
cleanContentIsSet
protected boolean cleanContentIsSet
lowerCaseContent
protected String lowerCaseContent
lowerCaseContentIsSet
protected boolean lowerCaseContentIsSet
noTagsLength
protected int noTagsLength
noTagsNoAnchorsLength
protected int noTagsNoAnchorsLength
linkRatio
protected int linkRatio
linkRatioIsSet
protected boolean linkRatioIsSet
allContentChecksAreDone
protected boolean allContentChecksAreDone
containsMetaTagsIsSet
protected boolean containsMetaTagsIsSet
containsMetaTags
protected boolean containsMetaTags
containsXMLIsSet
protected boolean containsXMLIsSet
containsXML
protected boolean containsXML
containsFormIsSet
protected boolean containsFormIsSet
containsForm
protected boolean containsForm
containsFramesIsSet
protected boolean containsFramesIsSet
containsFrames
protected boolean containsFrames
containsImageMapIsSet
protected boolean containsImageMapIsSet
containsImageMap
protected boolean containsImageMap
containsLayersIsSet
protected boolean containsLayersIsSet
containsLayers
protected boolean containsLayers
containsStylesIsSet
protected boolean containsStylesIsSet
containsStyles
protected boolean containsStyles
containsTemporalCuesIsSet
protected boolean containsTemporalCuesIsSet
containsTemporalCues
protected boolean containsTemporalCues
containsNewsCuesIsSet
protected boolean containsNewsCuesIsSet
containsNewsCues
protected boolean containsNewsCues
containsTechnicalCuesIsSet
protected boolean containsTechnicalCuesIsSet
containsTechnicalCues
protected boolean containsTechnicalCues
containsCodeIsSet
protected boolean containsCodeIsSet
containsCode
protected boolean containsCode
looksLike404IsSet
protected boolean looksLike404IsSet
looksLike404
protected boolean looksLike404
processedTagsIsSet
protected boolean processedTagsIsSet
rdf
protected String rdf
oneoffs
protected Vector oneoffs
links
protected Vector links
metaTags
protected Vector metaTags
xmlTags
protected Vector xmlTags
descriptions
protected Vector descriptions
descriptionsIsSet
protected boolean descriptionsIsSet
languages
protected Vector languages
languagesIsSet
protected boolean languagesIsSet
startOfOpenTag
protected String startOfOpenTag
endOfOpenTag
protected String endOfOpenTag
openTag
protected String openTag
closeTag
protected String closeTag
HTMLResource
public HTMLResource(String doc)
getTitle
public String getTitle()
- Specified by:
- getTitle in interface NetworkResource
getPublisherFromText
public String getPublisherFromText()
- Specified by:
- getPublisherFromText in interface NetworkResource
getCopyrightFromText
public String getCopyrightFromText()
- Specified by:
- getCopyrightFromText in interface NetworkResource
getExcerpt
public String getExcerpt(boolean refresh)
getExcerpt
public String getExcerpt()
- Specified by:
- getExcerpt in interface NetworkResource
getExcerpt
public String getExcerpt(int limit,
boolean refresh)
getExcerpt
public String getExcerpt(int limit)
- Specified by:
- getExcerpt in interface NetworkResource
getContent
public String getContent()
- Specified by:
- getContent in interface NetworkResource
setLowerCaseContent
protected void setLowerCaseContent()
getLowerCaseContent
public String getLowerCaseContent()
- Specified by:
- getLowerCaseContent in interface NetworkResource
cleanChunk
public String cleanChunk(String chunk,
int len)
getCleanContent
public String getCleanContent()
- Specified by:
- getCleanContent in interface NetworkResource
getLinkRatio
public int getLinkRatio()
- Specified by:
- getLinkRatio in interface NetworkResource
toString
public String toString()
- Specified by:
- toString in interface NetworkResource
- Overrides:
- toString in class Object
containsCode
public boolean containsCode()
containsTechnicalCues
public boolean containsTechnicalCues()
containsTemporalCues
public boolean containsTemporalCues()
containsNewsCues
public boolean containsNewsCues()
containsMetaTags
public boolean containsMetaTags()
containsXML
public boolean containsXML()
containsForm
public boolean containsForm()
containsFrames
public boolean containsFrames()
containsImageMap
public boolean containsImageMap()
containsLayers
public boolean containsLayers()
containsStyles
public boolean containsStyles()
looksLike404
public boolean looksLike404()
containsPattern
public boolean containsPattern(String uncompiledPattern,
int limit)
doAllContentChecks
public void doAllContentChecks()
processTags
public void processTags()
getMetaTags
public Vector getMetaTags()
getXMLTags
public Vector getXMLTags()
getOneoffs
public Vector getOneoffs()
getLinks
public Vector getLinks()
getRDF
public String getRDF()
main
public static void main(String[] args)
getDescriptions
public Vector getDescriptions()
- Returns the content attributes from meta tags that have a name or
http-equiv attribute that contains "description".
getLanguages
public Vector getLanguages()
- Returns the content attributes from meta tags that have a name or
http-equiv attribute that contains "language".