Home » Blogs » Full Page Cache: Enterprise PageCache Model Crawler

Full Page Cache: Enterprise PageCache Model Crawler

How to work/flow Enterprise_PageCache_Model_Crawler class in Magento? (Magento ver. 1.13.0.2)

Object: $this => Enterprise_PageCache_Model_Crawler Following constants is used in above class :
Crawler Settings:

  • const XML_PATH_CRAWLER_ENABLED = ‘system/page_crawl/enable’
  • const XML_PATH_CRAWLER_THREADS = ‘system/page_crawl/threads’;
  • const XML_PATH_CRAWL_MULTICURRENCY = ‘system/page_cra/multicurrency’;

rawler User Agent Name:

  • const USER_AGENT = ‘MagentoCrawler’;

Following variables is used in above class :

  • $_visitedUrls
  • $_app
  • $_adapterFactory

Some important functions and its purpose in this class:

  • crawl() : This function is used for crawl all system urls.
  • _executeRequests() : This function is used to prepares and executes requests by given request_paths values
  • _getCrawlerThreads() : This function is used for get number of crawler threads.
  • _isCrawlerEnabled() : Checks whether crawler is enabled for given store.

Functions in class Enterprise_PageCache_Model_Crawler

  • /**Initialize application, adapter factory
    * @param array $args*/
    public function __construct(array $args = array()) {}
  • /** Set resource model **/
    protected function _construct()
    {
    $this->_init(‘enterprise_pagecache/crawler’);
    }
  • /**Get internal links from page content
    * @deprecated after 1.11.0.0
    * @param string $pageContent
    * @return array **/
    public function getUrls($pageContent){}
  • /** Get configuration for stores base urls.
    * array(
    * $index => array(
    * ‘store_id’ => $storeId,
    * ‘base_url’ => $url,
    * ‘cookie’ => $cookie))
    * @return array **/
    public function getStoresInfo(){}
  • /**Crawl all system urls
    * @return Enterprise_PageCache_Model_Crawler **/
    public function crawl(){}
  • /** Prepares and executes requests by given request_paths values
    * @param array $info
    * @param Varien_Http_Adapter_Curl $adapter **/
    protected function _executeRequests(array $info, Varien_Http_Adapter_Curl $adapter){}
  • /**Retrieves number of crawler threads
    * @param int $storeId
    * @return int **/
    protected function _getCrawlerThreads($storeId)
    {
    return (int)$this->_app->getStore($storeId)
    ->getConfig(self::XML_PATH_CRAWLER_THREADS);
    }
  • /**Checks whether crawler is enabled for given store
    * @param int $storeId
    * @return null|string */
    protected function _isCrawlerEnabled($storeId)
    {
    return (bool)(string)$this->_app->getStore($storeId)-
    >getConfig(self::XML_PATH_CRAWLER_ENABLED);
    }

Sample data for cron.php

Variables:
URL :
https://localhost/oscp-store/mage-ent-test/cron.php

  • $_visitedUrls = array

[0] $_app = Mage_Core_Model_App
$_areas = Array [1] global = Mage_Core_Model_App_Area
$_store = Mage_Core_Model_Store
$_underscoreCache = Array [7] IsDefault = is_default
DefaultGroupId = default_group_id
JobCode = job_code
ScheduledAt = scheduled_at
ExecutedAt = executed_at
Status = status
FinishedAt = finished_at
$_cacheTag = true
$_eventPrefix = store
$_eventObjectstore
$_priceFilter =
$_website = Mage_Core_Model_Website
$_group = Mage_Core_Model_Store_Group
$_configCache =
$_configCacheBaseNodes = Array [10] $_dirCache = Array [0] $_urlCache = Array [0] $_baseUrlCache = Array [0] $_session =
$_isAdminSecure =
$_isFrontSecure =
$_frontendName =
$_isReadOnly = false
$_resourceName = core/store
$_resource =
$_resourceCollectionName = core/store_collection
$_dataSaveAllowed = true
$_isObjectNew =
$_data = Array [7] $_hasDataChanges = true
$_origData = Array [7] $_idFieldName =
$_isDeleted = false
$_oldFieldsMap = Array [0] $_syncFieldsMap = Array [0] $_website = Mage_Core_Model_Website
$_underscoreCache = Array [7] IsDefault = is_default
DefaultGroupId = default_group_id
JobCode = job_code
ScheduledAt = scheduled_at
ExecutedAt = executed_at
Status = status
FinishedAt = finished_at
$_cacheTag = true
$_eventPrefix = website
$_eventObject = website
$_configCache = Array [0] $_groups = Array [1] $_groupIds = Array [1] $_groupsCount = 1
$_stores = Array [3] $_storeIds = Array [3] $_storeCodes = Array [3] $_storesCount = 3
$_defaultGroup = Mage_Core_Model_Store_Group
$_defaultStore = Mage_Core_Model_Store
$_isCanDelete =
$_isReadOnly = false
$_resourceName = core/website
$_resource =
$_resourceCollectionName = core/website_collection
$_dataSaveAllowed = true
$_isObjectNew =
$_data = Array [10] $_hasDataChanges = true
$_origData = Array [10] $_idFieldName = website_id
$_isDeleted = false
$_oldFieldsMap = Array [0] $_syncFieldsMap = Array [0] $_locale =
$_translator =
$_design =
$_layout =
$_config = Mage_Core_Model_Config
$_useCache = false
$_cacheSections = Array [6] $_cacheLoadedSections = Array [0] $_options = Mage_Core_Model_Config_Options
$_classNameCache = Array [1] $_blockClassNameCache = Array [0] $_secureUrlCache = Array [0] $_distroServerVars =
$_substServerVars =
$_resourceModel = Mage_Core_Model_Resource_Config
$_eventAreas = Array [2] $_dirExists = Array [0] $_allowCacheForInit = true
$_cachePartsForSave = Array [0] $_prototype = Mage_Core_Model_Config_Base
$_isLocalConfigLoaded = true
$_baseDirCache = Array [0] $_customEtcDir =
$_canUseLocalModules = true
$_moduleNamespaces =
$_allowedModules = Array [0] $_xml = Mage_Core_Model_Config_Element
$_cacheId = config_global
$_cacheTags = Array [0] $_cacheLifetime =
$_cacheChecksum =
$_cacheSaved = false
$_cache =
$_elementClass = Mage_Core_Model_Config_Element
$_xpathExtends = //*[@extends] $_frontController =
$_cache = Mage_Core_Model_Cache
$_idPrefix = d4c_
$_frontend = Varien_Cache_Core
$_shmBackends = Array [6] $_defaultBackend = File
$_defaultBackendOptions = Array [4] $_requestProcessors = Array [1] $_disallowSave = false
$_allowedCacheOptions = Array [9] $_dbConnection = core_write
$_useCache =
$_websites = Array [6] 0 = Mage_Core_Model_Website
admin = Mage_Core_Model_Website
1 = Mage_Core_Model_Website
base = Mage_Core_Model_Website
2 = Mage_Core_Model_Website
private = Mage_Core_Model_Website
$_groups = Array [3] 0 = Mage_Core_Model_Store_Group
1 = Mage_Core_Model_Store_Group
2 = Mage_Core_Model_Store_Group
$_stores = Array [10] 0 = Mage_Core_Model_Store
admin = Mage_Core_Model_Store
1 = Mage_Core_Model_Store
default = Mage_Core_Model_Store
3 = Mage_Core_Model_Store
french = Mage_Core_Model_Store
2 = Mage_Core_Model_Store
german = Mage_Core_Model_Store
4 = Mage_Core_Model_Store
privatesales = Mage_Core_Model_Store
$_isSingleStore = false
$_isSingleStoreAllowed = true
$_currentStore = admin
$_request = Mage_Core_Controller_Request_Http
$_response =
$_events = Array [2] global = Array [10] crontab = Array [10] $_updateMode = false
$_useSessionInUrl = false
$_useSessionVar = false
$_isCacheLocked = false
$_adapterFactory = Enterprise_PageCache_Model_Adapter_Factory

Note: To debug crawler.php, i had done some changes in file config.xml is given below:

Path: app\code\core\Enterprise\PageCache\etc\config.xml

* * * * *

enterprise_pagecache/crawler::crawl

Leave A Comment