// parseBaseAndProtein.ts

import { entreeMap } from "./categoryGroups";

// define a set of categories where we DO want to parse singleTokenProteins
// (these might be your "Entrees" if you prefer)
const PROTEIN_PARSE_CATEGORIES = new Set([
   ...Object.keys(entreeMap)
    // etc. or your Clover category names or high-level group names
  ]);
  
  export function parseBaseAndProteinWithCategoryCheck(
    itemName: string,
    categoryName: string
  ): {
    baseName: string;
    proteinName?: string;
  } {
    // If the category is not in PROTEIN_PARSE_CATEGORIES, skip the logic entirely
    if (!PROTEIN_PARSE_CATEGORIES.has(categoryName)) {
      // Return the itemName as is (no protein extraction)
      return { baseName: itemName };
    }
  
    // Otherwise, run your existing parseBaseAndProtein logic
    return parseBaseAndProtein(itemName);
  }

/**
 * Attempts to detect a protein from an item name, supporting multi-token
 * combos like "Al Pastor" or "Birria de Chivo".
 *
 * Then returns `{ baseName, proteinName }`.
 *  - If no known protein is found, `proteinName` is omitted.
 *  - Otherwise, we remove that protein text from the item name.
 */
export function parseBaseAndProtein(itemName: string): {
    baseName: string;
    proteinName?: string;
  } {
    /**
     * 1) Convert everything to lower case (for detection).
     * 2) We maintain a known list of multi-token strings
     *    and single-token strings. We'll search for them
     *    in the itemName (in a naive “contains” or “word-based” approach).
     */
  
    const multiTokenProteins = [
      'al pastor',
      'birria de res',
      'birria de chivo',
      '(no protein)'
    ];
  
    const singleTokenProteins = [
      'birria',
      'asada',
      'veggie',
      'chicken',
      'shrimp',
      'chivo',
      'pastor',
      'veggies'
    ];
  
    // toLowerCase for searching
    const lowerName = itemName.toLowerCase();
  
    // --------------------------
    // Step A) Check multi-token first
    // --------------------------
    for (const multi of multiTokenProteins) {
      if (lowerName.includes(multi)) {
        // We found e.g. "al pastor" or "birria de chivo"
        // Remove that substring from the item name:
        const removed = removeSubstring(itemName, multi);
  
        // Capitalize the multi protein name in a simple way:
        const proteinName = multi
          .split(' ')
          .map((t) => capitalize(t))
          .join(' ');
  
        const baseName = cleanedUp(removed);
  
        return { baseName, proteinName };
      }
    }
  
    // --------------------------
    // Step B) Check single-token
    // We may detect multiple single tokens (e.g. "Birria Pastor"?),
    // but typically you only want one. We'll gather them all, then
    // combine if you prefer.
    // --------------------------
    // 1) Tokenize words:
    const tokens = itemName.split(/\s+/);
    const matchedProteins: string[] = [];
    const leftoverTokens: string[] = [];

    for (let i = 0; i < tokens.length; i++) {
    const tok = tokens[i];
    const lowTok = tok.toLowerCase();

    if (singleTokenProteins.includes(lowTok)) {
        // If the previous leftover token is exactly "de" (case-insensitive),
        // remove it from leftoverTokens to avoid "Quesadilla De"
        if (leftoverTokens.length > 0) {
        const lastIndex = leftoverTokens.length - 1;
        if (leftoverTokens[lastIndex].toLowerCase() === 'de') {
            leftoverTokens.pop(); // remove "de"
        }
        }
        matchedProteins.push(capitalize(lowTok));
    } else {
        leftoverTokens.push(tok);
    }
    }

    // If no proteins found, return the entire name as base
    if (matchedProteins.length === 0) {
    return { baseName: itemName };
    } else {
    const proteinName = matchedProteins.join(' ');
    const baseName = leftoverTokens.join(' ');
    return {
        baseName: cleanedUp(baseName),
        proteinName: cleanedUp(proteinName),
    };
    }
    }
  

    /**
   * A helper to remove a substring from the itemName in a naive case-insensitive way.
   * We find the substring's index (in lower-case), then remove it.
   */
    function removeSubstring(original: string, substringLower: string): string {
        // convert both to lower for indexOf
        const lowerOriginal = original.toLowerCase();
        const idx = lowerOriginal.indexOf(substringLower);
        if (idx === -1) return original;
      
        // remove that portion from the *original* string
        // substringLower has length:
        const len = substringLower.length;
        return (
          original.slice(0, idx) +
          ' ' + // keep a space or not
          original.slice(idx + len)
        );
      }
      
      /**
       * A tiny function to do a naive "capitalize first letter" of a token,
       * or you can do something more advanced.
       */
      function capitalize(word: string): string {
        if (!word) return word;
        return word[0].toUpperCase() + word.slice(1);
      }
      
      /**
       * Clean up extra spaces, etc.
       */
      function cleanedUp(str: string): string {
        return str
          .trim()
          .replace(/\s{2,}/g, ' '); // multiple spaces => single
      }