Skip to content
Snippets Groups Projects

Adds image validator

Open Merlijn Bijenveld requested to merge image_validator into main
1 file
+ 60
0
Compare changes
  • Side-by-side
  • Inline
+ 60
0
import os
import re
import requests
from md_validators.md_validator import MDValidator
class ImageValidator(MDValidator):
def __init__(self):
super().__init__()
self.report_file_name = "Image_Summary.md"
self.report_content = "# Image Summary\n\n"
def validate(self, file_path, file_content):
""" Validate the file content for Images """
images_with_lines = self.extract_links(file_content)
if images_with_lines:
print(f"Validating image links in: {file_path}")
file_name = os.path.basename(file_path)
markdown = f"## [{file_name}]({file_path})\n\n| URL/Path | Line | Valid |\n| --- | ---- | ----- |\n"
for url, line in images_with_lines:
print(url)
validity = "Invalid"
try:
validity = "Valid" if self.check_link_validity(url, file_path) else "Invalid"
except requests.exceptions.MissingSchema:
validity = "Invalid: Missing Schema"
except requests.exceptions.InvalidURL:
validity = "Invalid URL"
markdown += f"| {url} | {line} | {validity} |\n"
self.report_content += markdown
print(self.report_content)
def extract_links(self, content):
urls_with_lines = []
regex = r'!\[.*?\]\((https?://[^\s)]+|(?:\.\./[^\s)]+\.(?:jpg|jpeg|png|gif)))\)'
for line_num, line in enumerate(content.split('\n'), start=1):
urls = re.findall(regex, line)
for url in urls:
urls_with_lines.append((url, line_num))
return urls_with_lines
def check_link_validity(self, url, base_path):
if re.match(r'https?://', url):
try:
response = requests.head(url, allow_redirects=True)
return response.status_code == 200
except requests.ConnectionError:
return False
else:
local_path = os.path.join(os.path.dirname(base_path), url)
return os.path.exists(local_path)
def write_report(self):
with open(self.report_file_name, "w+") as report_file:
report_file.write(self.report_content)
#
\ No newline at end of file
Loading