Merlijn Bijenveld · Bas Pijls-van Kooten · d1b4a0c4
--- a/md_validators/image_validator.py 0 → 100644

+ 60

− 0
+++ b/md_validators/image_validator.py 0 → 100644

+ 60

− 0
+import os
+import re
+import requests
+from md_validators.md_validator import MDValidator
+
+
+class ImageValidator(MDValidator):
+    def __init__(self):
+        super().__init__()
+        self.report_file_name = "Image_Summary.md"
+        self.report_content = "# Image Summary\n\n"
+
+    def validate(self, file_path, file_content):
+        """ Validate the file content for Images """
+        images_with_lines = self.extract_links(file_content)
+        if images_with_lines:
+            print(f"Validating image links in: {file_path}")  
+            file_name = os.path.basename(file_path)
+            markdown = f"## [{file_name}]({file_path})\n\n| URL/Path | Line | Valid |\n| --- | ---- | ----- |\n"
+            for url, line in images_with_lines:
+                print(url)
+
+                validity = "Invalid"
+
+                try:
+                    validity = "Valid" if self.check_link_validity(url, file_path) else "Invalid"
+                except requests.exceptions.MissingSchema:
+                    validity = "Invalid: Missing Schema"
+                except requests.exceptions.InvalidURL:
+                    validity = "Invalid URL"
+
+                markdown += f"| {url} | {line} | {validity} |\n"
+
+            self.report_content += markdown
+        print(self.report_content)
+
+    def extract_links(self, content):
+        urls_with_lines = []
+        regex = r'!\[.*?\]\((https?://[^\s)]+|(?:\.\./[^\s)]+\.(?:jpg|jpeg|png|gif)))\)'
+        for line_num, line in enumerate(content.split('\n'), start=1):
+            urls = re.findall(regex, line)
+            for url in urls:
+                urls_with_lines.append((url, line_num))
+        return urls_with_lines
+
+    def check_link_validity(self, url, base_path):
+        if re.match(r'https?://', url):
+            try:
+                response = requests.head(url, allow_redirects=True)
+                return response.status_code == 200
+            except requests.ConnectionError:
+                return False
+        else:
+            local_path = os.path.join(os.path.dirname(base_path), url)
+            return os.path.exists(local_path)
+        
+    def write_report(self):
+        with open(self.report_file_name, "w+") as report_file:
+            report_file.write(self.report_content)
+#
+\ No newline at end of file