Skip to content

Commit 6692156

Browse files
pranitbauva1997Keats
authored andcommitted
exclude paginated pages in sitemap (#2555)
This fixes #2527.
1 parent d351098 commit 6692156

File tree

4 files changed

+34
-5
lines changed

4 files changed

+34
-5
lines changed

components/config/src/config/mod.rs

+18
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ pub enum Mode {
2929
Check,
3030
}
3131

32+
#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
33+
#[serde(rename_all = "lowercase")]
34+
pub enum ExcludePaginatedPagesInSitemap {
35+
None,
36+
All,
37+
}
38+
3239
#[derive(Clone, Debug, Deserialize)]
3340
#[serde(default, deny_unknown_fields)]
3441
pub struct Config {
@@ -102,6 +109,8 @@ pub struct Config {
102109
pub generate_sitemap: bool,
103110
/// Enables the generation of robots.txt
104111
pub generate_robots_txt: bool,
112+
/// Whether to exclude paginated pages in sitemap; can take values "none", "all"
113+
pub exclude_paginated_pages_in_sitemap: ExcludePaginatedPagesInSitemap,
105114
}
106115

107116
#[derive(Serialize)]
@@ -123,6 +132,7 @@ pub struct SerializedConfig<'a> {
123132
search: search::SerializedSearch<'a>,
124133
generate_sitemap: bool,
125134
generate_robots_txt: bool,
135+
exclude_paginated_pages_in_sitemap: ExcludePaginatedPagesInSitemap,
126136
}
127137

128138
impl Config {
@@ -287,6 +297,10 @@ impl Config {
287297
self.mode == Mode::Check
288298
}
289299

300+
pub fn should_exclude_paginated_pages_in_sitemap(&self) -> bool {
301+
self.exclude_paginated_pages_in_sitemap == ExcludePaginatedPagesInSitemap::All
302+
}
303+
290304
pub fn enable_serve_mode(&mut self) {
291305
self.mode = Mode::Serve;
292306
}
@@ -340,6 +354,7 @@ impl Config {
340354
search: self.search.serialize(),
341355
generate_sitemap: self.generate_sitemap,
342356
generate_robots_txt: self.generate_robots_txt,
357+
exclude_paginated_pages_in_sitemap: self.exclude_paginated_pages_in_sitemap,
343358
}
344359
}
345360
}
@@ -405,6 +420,7 @@ impl Default for Config {
405420
extra: HashMap::new(),
406421
generate_sitemap: true,
407422
generate_robots_txt: true,
423+
exclude_paginated_pages_in_sitemap: ExcludePaginatedPagesInSitemap::None,
408424
}
409425
}
410426
}
@@ -1066,4 +1082,6 @@ base_url = "example.com"
10661082
let config = Config::parse(config).unwrap();
10671083
assert!(config.generate_robots_txt);
10681084
}
1085+
1086+
// TODO: add a test for excluding paginated pages
10691087
}

components/site/src/sitemap.rs

+7-5
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,12 @@ pub fn find_entries<'a>(
8383
}
8484

8585
if let Some(paginate_by) = s.paginate_by() {
86-
let number_pagers = (s.pages.len() as f64 / paginate_by as f64).ceil() as isize;
87-
for i in 1..=number_pagers {
88-
let permalink = format!("{}{}/{}/", s.permalink, s.meta.paginate_path, i);
89-
entries.insert(SitemapEntry::new(Cow::Owned(permalink), &None));
86+
if !config.should_exclude_paginated_pages_in_sitemap() {
87+
let number_pagers = (s.pages.len() as f64 / paginate_by as f64).ceil() as isize;
88+
for i in 1..=number_pagers {
89+
let permalink = format!("{}{}/{}/", s.permalink, s.meta.paginate_path, i);
90+
entries.insert(SitemapEntry::new(Cow::Owned(permalink), &None));
91+
}
9092
}
9193
}
9294
}
@@ -100,7 +102,7 @@ pub fn find_entries<'a>(
100102
for item in &taxonomy.items {
101103
entries.insert(SitemapEntry::new(Cow::Borrowed(&item.permalink), &None));
102104

103-
if taxonomy.kind.is_paginated() {
105+
if taxonomy.kind.is_paginated() && !config.should_exclude_paginated_pages_in_sitemap() {
104106
let number_pagers = (item.pages.len() as f64
105107
/ taxonomy.kind.paginate_by.unwrap() as f64)
106108
.ceil() as isize;

docs/content/documentation/getting-started/configuration.md

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ ignored_static = []
6767
# When set to "true", a feed is automatically generated.
6868
generate_feeds = false
6969

70+
# When set to "all", paginated pages are not a part of the sitemap, default is "none"
71+
exclude_paginated_pages_in_sitemap = "none"
72+
7073
# The filenames to use for the feeds. Used as the template filenames, too.
7174
# Defaults to ["atom.xml"], which has a built-in template that renders an Atom 1.0 feed.
7275
# There is also a built-in template "rss.xml" that renders an RSS 2.0 feed.

docs/content/documentation/templates/pagination.md

+6
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ A paginated taxonomy gets two variables aside from the `paginator` variable:
5252

5353
See the [taxonomies page](@/documentation/templates/taxonomies.md) for a detailed version of the types.
5454

55+
## SEO
56+
57+
It is preferable to not include paginated pages in sitemap since they are non-canonical pages.
58+
To exclude paginated pages in sitemap, set the
59+
`exclude_paginated_pages_in_sitemap` as `all` in `config.toml`.
60+
5561
## Example
5662

5763
Here is an example from a theme on how to use pagination on a page (`index.html` in this case):

0 commit comments

Comments
 (0)