More checks for PRs

2025-07-13 07:23:54 +01:00 · 2021-03-23 18:32:45 -07:00 · 2021-03-23 18:32:45 -07:00 · 4af79b651f
commit 4af79b651f
parent e186559313
2 changed files with 181 additions and 93 deletions
--- a/.github/workflows/scripts/check-files.sh
+++ b/.github/workflows/scripts/check-files.sh
@ -3,12 +3,12 @@ set -eu

 SIZE_LIMIT=150000
 FAIL=0
+errMsgs=''

 check_size() {
 	size="$(stat --printf="%s" "$1")"
 	if [ "$size" -gt "$SIZE_LIMIT" ]; then
 		echo "File $1 is bigger than specified $SIZE_LIMIT limit"
-		FAIL=1
 	fi
 }

@ -23,7 +23,6 @@ check_file_name() {

 	if [ "$shouldname" != "$fileName" ]; then
 		echo "$1 should be named $shouldname."
-		FAIL=1
 	fi
 }

@ -36,116 +35,63 @@ check_recipe_name() {
 }

 check_recipe_content() {
-	errMsgs="$(awk '
-		BEGIN {
-			HAS_TITLE                   = 0;
-			HAS_TAGS                    = 0;
-			HAS_INVALID_TAGS            = 0;
-			NUM_TAGS                    = 0;
-			HAS_INGREDIENTS             = 0;
-			HAS_DIRECTIONS              = 0;
-			HAS_CONSECUTIVE_EMPTY_LINES = 0;
+	if file "$1" | grep -qF 'CRLF'; then
+		echo "Recipe has CRLF line endings. Please convert using dos2unix or similar tool.";

-			CONSECUTIVE_EMPTY_LINES = 0;
-		}
+		# Stop checking file. The rest of the checks will have lots of failures
+		# if we have CRLF line endings.
+		return;
+	fi

-		# First line should be the title
-		NR == 1 && /^# / {
-			HAS_TITLE = 1;
-			next;
-		}
-
-		$0 == "## Ingredients" {
-			HAS_INGREDIENTS = 1;
-		}
-
-		$0 == "## Directions" {
-			HAS_DIRECTIONS = 1;
-		}
-
-		$0 == "" {
-			CONSECUTIVE_EMPTY_LINES++
-			if (CONSECUTIVE_EMPTY_LINES >= 2) {
-				HAS_CONSECUTIVE_EMPTY_LINES = 1;
-			}
-		}
-
-		$0 != "" {
-			CONSECUTIVE_EMPTY_LINES = 0;
-		}
-
-		END {
-			# Last line should be the tags list
-			if ($1 == ";tags:") {
-				HAS_TAGS = 1;
-				NUM_TAGS = NF - 1;
-
-				# Loop through all the tags
-				for (i = 2; i <= NF; i++) {
-					# Make sure that each tag only contains lowercase letters and hyphens
-					if ($i !~ "^[a-z-]+$") {
-						HAS_INVALID_TAGS = 1;
-						break;
-					}
-				}
-			}
-
-			if (!HAS_TITLE) {
-				print "Recipe does not have a properly formatted title on the first line."
-			}
-
-			if (!HAS_TAGS) {
-				print "Recipe does not have a properly formatted tags on the last line."
-			} else {
-				if (HAS_INVALID_TAGS) {
-					print "Recipe has invalid tags. Tags must be separated by spaces and contain only lowercase letters or hyphens (-)";
-				}
-
-				if (NUM_TAGS < 2) {
-					print "Recipe only has " NUM_TAGS " tags. Add some more."
-				} else if (NUM_TAGS > 5) {
-					print "Recipe has " NUM_TAGS " tags which is too many. Remove some tags."
-				}
-			}
-
-			if (!HAS_INGREDIENTS) {
-				print "Recipe does not have an ingredients list."
-			}
-
-			if (!HAS_DIRECTIONS) {
-				print "Recipe does not have a directions section."
-			}
-
-			if (HAS_CONSECUTIVE_EMPTY_LINES) {
-				print "Recipe has at least 2 consecutive empty lines.";
-			}
-		}
-	' "$1")"
+	errMsgs="$(gawk -f '.github/workflows/scripts/parse_contents.awk'  "$1")"

 	if [ -n "$errMsgs" ]; then
 		echo "$errMsgs"
-		FAIL=1
 	fi
 }

+check_image() {
+	check_size "$1"
+	check_webp_name "$1"
+}
+
+check_recipe() {
+	check_recipe_name "$1"
+	check_recipe_content "$1"
+}
+
 while IFS= read -r file; do
-	echo "Checking '$file'"
+	# If the file doesn't exist, then the user probably deleted it, so don't
+	# check it.
+	# This will also ignore things that aren't files, like if someone adds a
+	# folder
+	if [ ! -f "$file" ]; then
+		continue;
+	fi
+
 	case "$file" in
 		# Ignore these files
+		# Don't ignore all .md files in root, because otherwise we can't catch
+		# recipe files that are placed in the wrong place.
 		index.md) ;;
+		example.md) ;;
+		README.md) ;;
 		.github/*.md) ;;

 		*.webp)
-			check_size "$file"
-			check_webp_name "$file"
+			errMsgs="$(check_image "$file")"
 			;;
 		*.md)
-			check_recipe_name "$file"
-			check_recipe_content "$file"
+			errMsgs="$(check_recipe "$file")"
 			;;
 	esac
-	# Separate each file for easier reading.
-	echo ""
+
+	if [ -n "$errMsgs" ]; then
+		echo "Errors Found in '$file'"
+		echo "$errMsgs"
+		echo ""
+		FAIL=1
+	fi
 done <<EOF
 $(git diff --name-only "$(git merge-base origin/master HEAD)")
 EOF
--- a/.github/workflows/scripts/parse_contents.awk
+++ b/.github/workflows/scripts/parse_contents.awk
@ -0,0 +1,142 @@
+#!/usr/bin/gawk -f
+
+# Check for Markdown titles
+/^# / {
+    if (NR == 1) {
+        HAS_VALID_TITLE++;
+    } else {
+        NUM_INVALID_TITLES++;
+    }
+}
+
+$0 == "## Ingredients" {
+    NUM_INGREDIENTS++;
+}
+
+$0 == "## Directions" {
+    NUM_DIRECTIONS++;
+}
+
+# Empty line
+$0 == "" {
+    # Count how many consecutive empty lines we have seen
+    CONSECUTIVE_EMPTY_LINES++
+
+    # If we have seen 2 or more then we are currently inside of a series of
+    # consecutive empty lines
+    if (INSIDE_CONSECUTIVE_EMPTY_LINES == 0 && CONSECUTIVE_EMPTY_LINES >= 2) {
+        NUM_CONSECUTIVE_EMPTY_LINES++;
+        INSIDE_CONSECUTIVE_EMPTY_LINES = 1;
+    }
+}
+
+# If this current line is not an empty line, but we were just inside a
+# consecutive empty line, then we have transitioned
+$0 != "" {
+    CONSECUTIVE_EMPTY_LINES = 0;
+    INSIDE_CONSECUTIVE_EMPTY_LINES = 0;
+}
+
+# check lines with images. Some files have links to QR codes which I will
+# ignore. Those are not part of this repo though, so they have https:// URLs. So
+# ignore lines with http(s)
+/\.webp/ && $0 !~ /https?:\/\// {
+    # Name regexes, to make it easier to read
+    EVERYTHING_UNTIL_NEXT_BRACKET = "[^\\]]*";
+    EVERYTHING_UNTIL_NEXT_DOT = "[^.]+";
+    EVERYTHING_UNTIL_NEXT_QUOTE = "[^\"]+";
+
+    IMAGE_PATH = "pix/" EVERYTHING_UNTIL_NEXT_DOT "\\.webp";
+    OPTIONAL_TITLE = "( \"" EVERYTHING_UNTIL_NEXT_QUOTE "\")?";
+
+    IMAGE_REGEX = "!\\[" EVERYTHING_UNTIL_NEXT_BRACKET "\\]\\(" IMAGE_PATH OPTIONAL_TITLE "\\)";
+
+    # If this does not have a properly formatted markdown image reference
+    if ($0 !~ IMAGE_REGEX) {
+        NUM_INVALID_IMAGE_INCLUSION++;
+    } else {
+        # If we do have a properly formatted image reference, check to see if
+        # the image we are referencing actually exists.
+        match($0, "(" IMAGE_PATH ")", pathArr)
+        path = "data/" pathArr[1];
+        if (system("test -f " path) != 0) {
+            print "Recipe loads the image '" path "', but no such image exists. Remove this reference, or add this image to the PR.";
+        }
+    }
+}
+
+# Matches lines that end with whitespace
+/\s$/ {
+    NUM_TRAILING_WHITESPACE++;
+}
+
+# Matches a tab character that is preceded by a non tab character (ie, matches a
+# tab that isn't used to indent a line)
+/[^\t]+\t/ {
+    NUM_NON_INDENTING_TAB++;
+}
+
+END {
+    # Last line should be the tags list
+    if ($1 == ";tags:") {
+        HAS_TAGS = 1;
+        NUM_TAGS = NF - 1;
+
+        # Loop through all the tags
+        # Start at field 2 because field 1 is ;tags:
+        for (field = 2; field <= NF; field++) {
+            # Make sure that each tag only contains lowercase letters and hyphens
+            if ($field !~ "^[a-z-]+$") {
+                NUM_INVALID_TAGS++;
+            }
+        }
+    }
+
+    if (!HAS_VALID_TITLE) {
+        print "Recipe does not have a properly formatted title on the first line."
+    }
+
+    if (NUM_INVALID_TITLES > 0) {
+        print "Recipe has " NUM_INVALID_TITLES " invalid titles. A title (line beginning with one '#') should only be on the first line of the recipe."
+    }
+
+    if (!HAS_TAGS) {
+        print "Recipe does not have a properly formatted tags on the last line."
+    } else {
+        if (NUM_INVALID_TAGS > 0) {
+            print "Recipe has " NUM_INVALID_TAGS " invalid tags. Tags must be separated by spaces and contain only lowercase letters or hyphens (-)";
+        }
+
+        if (NUM_TAGS < 2) {
+            print "Recipe only has " NUM_TAGS " tags. You need between 2 and 5."
+        } else if (NUM_TAGS > 5) {
+            print "Recipe has " NUM_TAGS " tags which is too many. You need between 2 and 5."
+        }
+    }
+
+    if (NUM_INGREDIENTS != 1) {
+        print "Recipe has " NUM_INGREDIENTS " ingredients sections. Every recipe must have exactly 1 ingredients section."
+    }
+
+    if (NUM_DIRECTIONS != 1) {
+        print "Recipe has " NUM_DIRECTIONS " directions sections. Every recipe must have exactly 1 directions section."
+    }
+
+    if (NUM_CONSECUTIVE_EMPTY_LINES > 0) {
+        print "Recipe has " NUM_CONSECUTIVE_EMPTY_LINES " group(s) of of consecutive empty lines.";
+    }
+
+    if (NUM_INVALID_IMAGE_INCLUSION == 1) {
+        print "Recipe has 1 image that isn't being included correctly. Look at other recipes to see how they are included. Note that the image path should NOT start with 'data/'";
+    } else if (NUM_INVALID_IMAGE_INCLUSION >= 2) {
+        print "Recipe has " NUM_INVALID_IMAGE_INCLUSION " images that aren't being included correctly. Look at other recipes to see how they are included. Note that the image path should NOT start with 'data/'";
+    }
+
+    if (NUM_TRAILING_WHITESPACE > 0) {
+        print "Recipe has " NUM_TRAILING_WHITESPACE " line(s) with trailing whitespace (spaces/tabs etc at the end of lines). Please remove them.";
+    }
+
+    if (NUM_NON_INDENTING_TAB > 0) {
+        print "Recipe has " NUM_NON_INDENTING_TAB " line(s) with tab characters that aren't at the beginning of the line. Please remove them."
+    }
+}