public class RobotsManager
extends org.apache.manifoldcf.core.database.BaseTable
| Field | Type | Description |
|---|---|---|
| hostname | VARCHAR(255) | Primary Key |
| robotsdata | BIGINT | |
| expirationtime | BLOB |
| Modifier and Type | Class and Description |
|---|---|
protected static class |
RobotsManager.HostDescription
This is the object description for a robots host object.
|
protected static class |
RobotsManager.HostExecutor
This is the executor object for locating robots host objects.
|
protected static class |
RobotsManager.Record
This class represents a record in a robots.txt file.
|
protected static class |
RobotsManager.RobotsCacheClass
Cache class for robots.
|
protected static class |
RobotsManager.RobotsData
This is a cached data item.
|
| Modifier and Type | Field and Description |
|---|---|
static java.lang.String |
_rcsid |
protected static java.lang.String |
expirationField |
protected static java.lang.String |
hostField |
protected static RobotsManager.RobotsCacheClass |
robotsCacheClass |
protected static java.lang.String |
robotsField |
| Constructor and Description |
|---|
RobotsManager(org.apache.manifoldcf.core.interfaces.IThreadContext tc,
org.apache.manifoldcf.core.interfaces.IDBInterface database)
Constructor.
|
| Modifier and Type | Method and Description |
|---|---|
java.lang.Boolean |
checkFetchAllowed(java.lang.String userAgent,
java.lang.String hostName,
long currentTime,
java.lang.String pathString,
org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities)
Read robots.txt data from the cache or from the database.
|
void |
deinstall()
Uninstall the manager.
|
protected static boolean |
doesPathMatch(java.lang.String path,
int pathIndex,
java.lang.String spec,
int specIndex)
Recursive method for matching specification to path.
|
protected static boolean |
doesPathMatch(java.lang.String path,
java.lang.String spec)
Check if path matches specification
|
protected static java.lang.String |
getRobotsKey(java.lang.String hostName)
Construct a key which represents an individual host name.
|
void |
install()
Install the manager.
|
protected static java.lang.String |
makeReadable(java.lang.String inputString)
Convert a string from the robots file into a readable form that does NOT contain NUL characters (since postgresql does not accept those).
|
protected RobotsManager.RobotsData |
readRobotsData(java.lang.String hostName,
org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities)
Read robots data, if it exists.
|
void |
writeRobotsData(java.lang.String hostName,
long expirationTime,
java.io.InputStream data)
Write robots.txt, replacing any existing row.
|
addTableIndex, analyzeTable, beginTransaction, buildConjunctionClause, constructCountClause, constructDistinctOnClause, constructDoubleCastClause, constructOffsetLimitClause, constructRegexpClause, constructSubstringClause, endTransaction, findConjunctionClauseMax, getDatabaseCacheKey, getDBInterface, getMaxInClause, getMaxOrClause, getSleepAmt, getTableIndexes, getTableName, getTableSchema, getTransactionID, getWindowedReportMaxRows, makeTableKey, noteModifications, performAddIndex, performAlter, performCommit, performCreate, performDelete, performDrop, performInsert, performModification, performQuery, performQuery, performRemoveIndex, performUpdate, prepareRowForSave, readRow, reindexTable, signalRollback, sleepForpublic static final java.lang.String _rcsid
protected static RobotsManager.RobotsCacheClass robotsCacheClass
protected static final java.lang.String hostField
protected static final java.lang.String robotsField
protected static final java.lang.String expirationField
public RobotsManager(org.apache.manifoldcf.core.interfaces.IThreadContext tc,
org.apache.manifoldcf.core.interfaces.IDBInterface database)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
tc - is the thread context.database - is the database handle.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void install()
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void deinstall()
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic java.lang.Boolean checkFetchAllowed(java.lang.String userAgent,
java.lang.String hostName,
long currentTime,
java.lang.String pathString,
org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
hostName - is the host for which the data is desired.currentTime - is the time of the check.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void writeRobotsData(java.lang.String hostName,
long expirationTime,
java.io.InputStream data)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException,
java.io.IOException
hostName - is the host.expirationTime - is the time this data should expire.data - is the robots data stream. May be null.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionjava.io.IOExceptionprotected static java.lang.String getRobotsKey(java.lang.String hostName)
hostName - is the name of the connector.protected RobotsManager.RobotsData readRobotsData(java.lang.String hostName, org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionprotected static java.lang.String makeReadable(java.lang.String inputString)
protected static boolean doesPathMatch(java.lang.String path,
java.lang.String spec)
protected static boolean doesPathMatch(java.lang.String path,
int pathIndex,
java.lang.String spec,
int specIndex)