Skip to content
Snippets Groups Projects
CSVParser.java 6.94 KiB
Newer Older
package mi.hdm.filesystem;

import mi.hdm.recipes.Ingredient;
import mi.hdm.recipes.Measurement;
import mi.hdm.recipes.NutritionTable;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.InvalidPropertiesFormatException;
import java.util.List;

public class CSVParser {
    private static final Logger log = LogManager.getLogger(CSVParser.class);

    /**
     * @param filepath Path where the CSV file is located
     * @param split    The character that the columns are split by
     * @param extract  Names of the columns to extract with extract[0] being the name of the ingredient
     * @return list of all ingredients inside the csv
     */
    public List<Ingredient> getIngredientsFromCSV(String filepath, char split, String... extract) throws IOException {
        log.info("Trying to read ingredients from CSV: {}", filepath);

        //try-block with automatic resource management
        try (BufferedReader reader = new BufferedReader(new FileReader(filepath))) {
            final String header = reader.readLine(); //read first line of CSV
            final int[] indexes = getColumnIndexes(header, split, extract);
            final List<Ingredient> ingredients = reader.lines()
                    .map(line -> getIngredientFromLine(line, split, indexes))
                    .toList();
            if (ingredients.size() > 0) {
                log.info("Found and parsed {} ingredients.", ingredients.size());
            } else {
                log.warn("No ingredients found in CSV, returning empty list.");
            }
        } catch (FileNotFoundException fileNotFoundException) {
            final Path filename = Path.of(filepath).getFileName();
            log.error("File '{}' not found at specified filepath: {}.", filename, filepath);
            fileNotFoundException.printStackTrace();
            throw fileNotFoundException;
        } catch (IOException io) {
            log.error("An error occurred while reading CSV: {}", filepath);
            throw io;
        }
    }

    private Ingredient getIngredientFromLine(String line, char split, int[] idx) throws NumberFormatException {
        log.debug("Trying to parse line {}", line);

        final Measurement measurement = Measurement.GRAM;
        final List<String> splitLine = splitLine(line, split);

        final String recipeName = splitLine.get(idx[0]);
        final List<Double> nutrition = new ArrayList<>();

        for (int i = 1; i < idx.length; i++) {
            String element = splitLine.get(idx[i]).split(" ")[0];
            double quantity = parseNumberFromString(element);
            if (getMeasurementFromString(element).equals("mg")) {
                quantity = quantity /1000;
            }
            nutrition.add(quantity);
        final NutritionTable nutritionTable = new NutritionTable(nutrition);
        return new Ingredient(measurement, recipeName, nutritionTable);
    }

    private int[] getColumnIndexes(String header, char splitChar, String... extract) throws InvalidPropertiesFormatException {
        final String[] split = header.split(String.valueOf(splitChar)); //split header of CSV
        //find indexes of columns that need to be extracted
        int[] colIndexes = new int[extract.length];
        for (int i = 0; i < extract.length; i++) {
            int idx = firstOccurrenceOf(split, extract[i]);
            if (idx < 0) {
                log.error("The specified column '{}' does not exist in the provided CSV file.", extract[i]);
                throw new InvalidPropertiesFormatException(String.format("Column %s does not exist in this CSV file.%n", extract[i]));
            }
            colIndexes[i] = idx;
        }
        return colIndexes;
    }

    /**
     * Returns the index of the first occurence of the specified element in the array
     *
     * @param arr     The array that should be searched through
     * @param element The element that should be identified in the array
     * @return the index of the first occurence of the specified element in the array, or -1 if it does not exist in the array
     */
    private <T> int firstOccurrenceOf(T[] arr, T element) {
        for (int i = 0; i < arr.length; i++) {
            if (arr[i].equals(element)) return i;
        }
        return -1;
    }

    /**
     * Get only the quantity of a nutrition value without its measurement
     *
     * @param candidate the nutrition value the quantity is extracted from
     * @return quantity of nutrition value as double (that is without measurement)
     */
    private double parseNumberFromString(String candidate) {
        if (candidate.isBlank()) return 0.0;

        StringBuilder numValue = new StringBuilder();
        boolean hasDot = false;
        for (int i = 0; i < candidate.length(); i++) {
            char c = candidate.charAt(i);
            if (Character.isDigit(c)) {
                numValue.append(c);
            } else if (!hasDot && numValue.length() > 0 && c == '.') {
                hasDot = true;
                numValue.append(c);
            } else {
                break;
            }
        }
        return Double.parseDouble(numValue.toString());
    }

    /**
     * Get only the measurement of a nutrition value
     *
     * @param candidate the nutrition value the measurement is extracted from
     * @return measurement of nutrition value as String
     */
    private String getMeasurementFromString(String candidate) {
        if (candidate.isBlank()) return "";

        StringBuilder unit = new StringBuilder();
        for (int i = 0; i < candidate.length(); i++) {
            char c = candidate.charAt(i);
            if (!Character.isDigit(c) && !(c == ' ')) {
                unit.append(c);
            }
        }
        return unit.toString();
    }

    /**
     * Split a line of CSV in its individual tokens based on a character while accounting for strings inside of columns
     *
     * @return list of tokens inside the provided line
     */
    private List<String> splitLine(String line, char splitChar) {
        final List<String> output = new ArrayList<>();
        StringBuilder builder = new StringBuilder();

        boolean inQuotes = false;
        for (int i = 0; i < line.length(); i++) {
            final char c = line.charAt(i);
            if (c == '"') {
                inQuotes = !inQuotes;
            } else if (c == splitChar && !inQuotes) {
                output.add(builder.toString());
                builder = new StringBuilder();
            } else {
                builder.append(c);
            }
        }
        //when end is reached, dump the rest contents of string builder to the list (if not empty)
        if (!builder.isEmpty()) {
            output.add(builder.toString());
        }
        return output;
    }
}