// (c) Stephen P. Morse, 2003

var firstLetter = ALEF;
var lastLetter = TAF;
var vowels = ALEF + AYIN + VAV;

var newrules = [
[ZAYIN + DALET + ZAYIN, "2", "4", "4"],
[SAMEKH + TESS + SHIN, "2", "4", "4"],
[SAMEKH + TESS + ZAYIN, "2", "4", "4"],
[SAMEKH + TAF + ZAYIN, "2", "4", "4"],
[SAMEKH + TAF + SHIN, "2", "4", "4"],
[SHIN + TESS + SHIN, "2", "4", "4"],
[SHIN + TESS + ZAYIN, "2", "4", "4"],
[SHIN + TAF + SHIN, "2", "4", "4"],
[SHIN + TAF + ZAYIN, "2", "4", "4"],
[YUD + YUD + AYIN, "1", "1", "1"],
[YUD + YUD + HAY, "1", "1", "1"],
[DALET + SAMEKH, "4", "4", "4"],
[DALET + SHIN, "4", "4", "4"],
[DALET + ZAYIN, "4", "4", "4"],
[KHESS + SAMEKH, "5", "54", "54"],
[TESS + SHIN, "4", "4", "4"],
[KHESS + SHIN, "5", "54", "54"],
[KAF + SAMEKH, "5", "54", "54"],
[KAF + SHIN, "5", "54", "54"],
[MEM + NUN, "66", "66", "66"],
[MEM + NUN2, "66", "66", "66"],
[NUN + MEM, "66", "66", "66"],
[NUN + MEM2, "66", "66", "66"],
[PAY + BAIS, "7", "7", "7"], // give me an example
[KUF + SAMEKH, "5", "54", "54"],
[KUF + SHIN, "5", "54", "54"],
[SAMEKH + DALET, "2", "43", "43"],
[SAMEKH + TESS, "2", "43", "43"],
[SAMEKH + TAF, "2", "43", "43"],
[SHIN + DALET, "2", "43", "43"],
[SHIN + TESS, "2", "43", "43"],
[SHIN + TAF, "2", "43", "43"],
[TAF + SHIN, "4", "4", "4"],
[ZAYIN + SHIN, "4", "4", "4"],
[ALEF + VAV, "0", "7", "999"],
[YUD + VAV, "1", "999", "999"],
[YUD + ALEF, "1", "1", "1"],
[ALEF, "0", "999", "999"],
[BAIS, "7", "7", "7"],
[GIMEL, "5", "5", "5"],
[DALET, "3", "3", "3"],
[HAY, "5", "5", "999"],
[VAV, "7", "7", "7"],
[ZAYIN, "4", "4", "4"],
[KHESS, "5", "5", "5"],
[TESS, "3", "3", "3"],
[YUD, "1", "1", "999"],
[KAF, "5", "5", "5"],
[KHAF2, "5", "5", "5"],
[LAMED, "8", "8", "8"],
[MEM, "6", "6", "6"],
[MEM2, "6", "6", "6"],
[NUN, "6", "6", "6"],
[NUN2, "6", "6", "6"],
[SAMEKH, "4", "4", "4"],
[AYIN, "0", "999", "999"],
[PAY, "7", "7", "7"],
[FAY2, "7", "7", "7"],
[TSADI, "4", "4", "4"],
[TSADI2, "4", "4", "4"],
[KUF, "5", "5", "5"],
[RAISH, "9", "9", "9"],
[SHIN, "4", "4", "4"],
[TAF, "3", "3", "3"],
];

// Now branching cases

var xnewrulesAshkenazi = [
[YUD + ALEF, "1", "999", "999"],
[YUD + VAV, "1", "1", "1"],
[VAV, "7", "999", "999"], // vowel VAV can never appear at the beginning of a word
[TAF, "4", "4", "4"],
];
var xnewruleslistAshkenazi = "!" + YUD + ALEF + "!" + YUD + VAV + "!" + VAV + "!!" + TAF + "!!";

var xnewrulesSephardic = [
[YUD + ALEF, "1", "999", "999"],
[YUD + VAV, "1", "1", "1"],
[VAV, "7", "999", "999"], // vowel VAV can never appear at the beginning of a word
];
var xnewruleslistSephardic = "!" + YUD + ALEF + "!" + YUD + VAV + "!" + VAV + "!!";

var xnewrules = xnewrulesSephardic;
var xnewruleslist = xnewruleslistSephardic;

///////////////////////////

function FixVav(rawtext) {
  // force single VAV to be a vowel (i.e., replace with an AYIN) in the following cases:
  //    preceded by BAIS or PAY
  //    followed by MEM or NUN
  // force double VAV to be a consonant (i.e., replace with a BAIS)

  // this also fixes double YUD as well, forcing it to always be a vowel and never "YI"

  text = "";
  for (var i=0; i<rawtext.length; i++) {
    var ch = rawtext.charAt(i);
    var prevChar = (i==0) ? "" : rawtext.charAt(i-1);
    var nextChar = (i==rawtext.length-1) ? "" : rawtext.charAt(i+1);
    var nextNextChar = (i==rawtext.length-2) ? "" : rawtext.charAt(i+2);

    if (ch == VAV) {
      if (nextChar == VAV) { // double VAV
        ch = BAIS;
        i++;
      } else if (nextChar == MEM || nextChar == MEM2 || nextChar == NUN || nextChar == NUN2) {
        // single VAV followed by MEM or NUN
        ch = AYIN;
      } else if (prevChar == BAIS || prevChar == PAY) {
        // preceded by a BAIS or PAY
        ch = AYIN;
      }
    } else if (ch == YUD && nextChar == YUD) {
      if (i == rawtext.length-3 && nextNextChar == HAY) {
        // leave YUD YUD HAY at end of name in tact and let the table take care of it (generate a 1)
        ch = YUD + YUD + HAY;
        i += 2;
      } else if (i <= rawtext.length-3 && nextNextChar == AYIN) {
        // leave YUD YUD AYIN in tact and let the table take care of it (generate a 1)
        ch = YUD + YUD + AYIN;
        i += 2;
      } else {
        // treat all other double YUDs as a vowel instead of Yi
        ch = AYIN;
        i++;
      }
    }
    text += ch;
  }
  return text;
}

function SoundexWithoutDuplicateConsonantRule(rawtext, separator) {

  // Suspend the dm duplicate-consonant rule
  // Do so by forcing a vowel in between them, which is probably the case in hebrew
  // But there are times when we don't want to suspend it, so let's do both
  // Don't use ALEF for the vowel because of the ALEF VAV rule in the branching case

  var words = rawtext.split(" ");

  var combinedSoundex = "";
  for (y=0; y<words.length; y++) {
    text = words[y];

    // first create an array of all alternates for this word, with and without intervening vowels

    var text2 = "";
    var textArray = [""];
    for (var i=0; i<text.length; i++) {
      var ch = text.charAt(i);
      if (ch < ALEF || ch > TAF) {
        continue;
      }
      for (x=0; x<textArray.length; x++) {
        textArray[x] += ch;
      }
      if (i!=(text.length-1) && !IsVowel(text.charAt(i)) && !IsVowel(text.charAt(i+1))) {
        if (text.charAt(i) == VAV && text.charAt(i+1) == VAV) {
          continue; // don't insert a vowel between two consecutive VAVs
        }
        // might want to be selective here, like doing it only for certain letter combinations
        var textArrayUpperHalf = textArray.length;
        for (x=0; x<textArrayUpperHalf; x++) {
          textArray[textArrayUpperHalf+x] = textArray[x] + AYIN; // don't use alef -- see above
        }
      }
    }

    // unravel the array into a single string of alternates

    for (var x=0; x<textArray.length; x++) {
      if (text2 != "") {
        text2 += " ";
      }
      text2 += textArray[x];
    }

    // generate the soundex of each alternate in the string

    var rawSoundex = soundex(text2);

    // remove duplicated soundex codes

    var wordSoundex = "";
    var soundexArray = rawSoundex.split(" ");
    soundexArray.sort();
    for (x=0; x<soundexArray.length; x++) {
      if (x == 0) {
        wordSoundex = soundexArray[x];
      } else {
        if (soundexArray[x] != soundexArray[x-1]) {
          wordSoundex += " " + soundexArray[x];
        }
      }
    }

    // combine with the soundex of any preceding words

    if (combinedSoundex != "") {
      combinedSoundex += separator;
    }
    combinedSoundex += wordSoundex;
  }

  // return the result

  return combinedSoundex;
}

function IsVowel(value) {
  return (value==ALEF || value == AYIN || value == YUD);
}

