Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
45 / 45
100.00% covered (success)
100.00%
7 / 7
CRAP
100.00% covered (success)
100.00%
1 / 1
ConvertsMarkdownToPlainText
100.00% covered (success)
100.00%
45 / 45
100.00% covered (success)
100.00%
7 / 7
19
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 execute
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 applyRegexTransformations
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
2
 applyStringTransformations
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
2
 removeTables
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
5
 removeBlockquotes
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 trimWhitespace
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
5
1<?php
2
3declare(strict_types=1);
4
5namespace Hyde\Framework\Actions;
6
7use function trim;
8use function rtrim;
9use function is_numeric;
10use function str_ends_with;
11use function str_starts_with;
12use function substr;
13use function explode;
14use function implode;
15use function array_keys;
16use function str_replace;
17use function array_values;
18use function preg_replace;
19
20/**
21 * Converts Markdown to plain text.
22 */
23class ConvertsMarkdownToPlainText
24{
25    protected const ATX_HEADERS = ['/^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} {0,}(\n)?\s{0,}$/m' => '$1$2$3'];
26    protected const SETEXT_HEADERS = ['/\n={2,}/' => "\n"];
27    protected const HORIZONTAL_RULES = ['/^(-\s*?|\*\s*?|_\s*?){3,}\s*/m' => ''];
28    protected const HTML_TAGS = ['/<[^>]*>/' => ''];
29    protected const CODE_BLOCKS = ['/(`{3,})(.*?)\1/m' => '$2'];
30    protected const FENCED_CODEBLOCKS = ['/`{3}.*\n/' => '', '/`{3}/' => ''];
31    protected const TILDE_FENCED_CODEBLOCKS = ['/~{3}.*\n/' => '', '/~{3}/' => ''];
32    protected const INLINE_CODE = ['/`(.+?)`/' => '$1'];
33    protected const IMAGES = ['/\!\[(.*?)\][\[\(].*?[\]\)]/' => '$1'];
34    protected const INLINE_LINKS = ['/\[(.*?)\][\[\(].*?[\]\)]/' => '$1'];
35    protected const REFERENCE_LINKS = ['/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/' => ''];
36    protected const STRIKETHROUGH = ['/~~/' => ''];
37    protected const BLOCKQUOTES = ['/^\s{0,3}>\s?/' => ''];
38    protected const FOOTNOTES = ['/\[\^.+?\](\: .*?$)?/' => ''];
39    protected const EMPHASIS = ['/([\*_]{1,3})(\S.*?\S{0,1})\1/' => '$2'];
40
41    /** Emphasis (repeat the line to remove double emphasis) */
42    protected const DOUBLE_EMPHASIS = self::EMPHASIS;
43
44    /** Replace two or more newlines with exactly two */
45    protected const REPEATED_NEWLINES = ['/\n{2,}/' => "\n\n"];
46
47    protected string $markdown;
48
49    public function __construct(string $markdown)
50    {
51        $this->markdown = $markdown;
52    }
53
54    /**
55     * Regex based on https://github.com/stiang/remove-markdown, licensed under MIT.
56     */
57    public function execute(): string
58    {
59        return $this->applyStringTransformations($this->applyRegexTransformations($this->markdown));
60    }
61
62    protected function applyRegexTransformations(string $markdown): string
63    {
64        /** @var array<array-key, array<string, string>> $patterns */
65        $patterns = [
66            static::ATX_HEADERS,
67            static::SETEXT_HEADERS,
68            static::HORIZONTAL_RULES,
69            static::HTML_TAGS,
70            static::CODE_BLOCKS,
71            static::FENCED_CODEBLOCKS,
72            static::TILDE_FENCED_CODEBLOCKS,
73            static::INLINE_CODE,
74            static::IMAGES,
75            static::INLINE_LINKS,
76            static::REFERENCE_LINKS,
77            static::STRIKETHROUGH,
78            static::BLOCKQUOTES,
79            static::FOOTNOTES,
80            static::EMPHASIS,
81            static::DOUBLE_EMPHASIS,
82            static::REPEATED_NEWLINES,
83        ];
84
85        foreach ($patterns as $pattern) {
86            $markdown = preg_replace(array_keys($pattern), array_values($pattern), $markdown) ?? $markdown;
87        }
88
89        return $markdown;
90    }
91
92    protected function applyStringTransformations(string $markdown): string
93    {
94        $lines = explode("\n", $markdown);
95        foreach ($lines as $line => $contents) {
96            $contents = $this->removeTables($contents);
97            $contents = $this->removeBlockquotes($contents);
98            $contents = $this->trimWhitespace($contents);
99
100            $lines[$line] = $contents;
101        }
102
103        return implode("\n", $lines);
104    }
105
106    protected function removeTables(string $contents): string
107    {
108        // Remove dividers
109        if (str_starts_with($contents, '|--') && str_ends_with($contents, '--|')) {
110            $contents = str_replace(['|', '-'], ['', ''], $contents);
111        }
112        // Remove cells
113        if (str_starts_with($contents, '| ') && str_ends_with($contents, '|')) {
114            $contents = rtrim(str_replace(['| ', ' | ', ' |'], ['', '', ''], $contents), ' ');
115        }
116
117        return $contents;
118    }
119
120    protected function removeBlockquotes(string $contents): string
121    {
122        // Remove blockquotes
123        if (str_starts_with($contents, '> ')) {
124            $contents = substr($contents, 2);
125        }
126        // Remove multiline blockquotes
127        if (str_starts_with($contents, '>')) {
128            $contents = substr($contents, 1);
129        }
130
131        return $contents;
132    }
133
134    protected function trimWhitespace(string $contents): string
135    {
136        // If it is a list, don't trim the whitespace
137        $firstCharacter = substr(trim($contents), 0, 1);
138
139        if ($firstCharacter === '-' || $firstCharacter === '*' || $firstCharacter === '+' || is_numeric($firstCharacter)) {
140            return $contents;
141        }
142
143        return trim($contents);
144    }
145}