# Robots.txt for CECOM Army.mil # Directs search engine crawlers and references the sitemap # Sitemap location Sitemap: https://cecom.army.mil/sitemap.xml # Standard rules for all crawlers User-agent: * # Allow crawling of public areas Allow: / # ============================================================ # DISALLOW: Gallery Pages (duplicate content across sections) # ============================================================ Disallow: /CTSF/Gallery Disallow: /HallofFame/Gallery Disallow: /Home/Gallery Disallow: /ILSC/Gallery Disallow: /SEC/Gallery Disallow: /USAISEC/Gallery # ============================================================ # DISALLOW: Hall of Fame Duplicate Pages # ============================================================ Disallow: /Halloffame/Hall_of_Fame_Links Disallow: /Halloffame/Careers/ Disallow: /Halloffame/Recruiting_Events Disallow: /Halloffame/Benefits Disallow: /Halloffame/Hiring_Programs Disallow: /Halloffame/FAQ Disallow: /Halloffame/WLB # ============================================================ # DISALLOW: Historian Photo Pages # ============================================================ Disallow: /Historian/EPhotos Disallow: /Historian/PPhotos Disallow: /Historian/TTYPhotos Disallow: /Historian/Order/ # ============================================================ # DISALLOW: Home Duplicate Pages # ============================================================ Disallow: /Home/News # ============================================================ # DISALLOW: ILSC Duplicate Pages # ============================================================ Disallow: /ILSC/WLB Disallow: /ILSC/Recruiting_Events # ============================================================ # DISALLOW: SEC Duplicate Pages # ============================================================ Disallow: /SEC/Careers/ Disallow: /SEC/Recruiting_Events Disallow: /SEC/Benefits Disallow: /SEC/Hiring_Programs Disallow: /SEC/FAQ # ============================================================ # DISALLOW: USAISEC Duplicate Pages # ============================================================ Disallow: /USAISEC/Careers Disallow: /USAISEC/Recruiting_Events Disallow: /USAISEC/Benefits Disallow: /USAISEC/Hiring_Programs Disallow: /USAISEC/FAQ Disallow: /USAISEC/WLB # ============================================================ # DISALLOW: Error Pages & Admin # ============================================================ Disallow: /Error/ Disallow: /secure/ Disallow: /admin/ Disallow: /api/ # ============================================================ # DISALLOW: File Types (documents, images, PDFs) # ============================================================ Disallow: /*.doc Disallow: /*.jpeg Disallow: /*.pdf Disallow: /*.png # ============================================================ # CRAWL SETTINGS # ============================================================ # Crawl delay (optional - adds delay between requests in seconds) Crawl-delay: 1 # Request rate limiting (optional) Request-rate: 30/1m # ============================================================ # ALLOW SPECIFIC STATIC ASSETS # ============================================================ # Note: These overrides are optional if you want to ensure # CSS, JavaScript, and images used in pages are crawled Allow: *.css Allow: *.js Allow: *.woff Allow: *.woff2 Allow: *.ttf