Заменить двойные кавычки кавычками

Я ищу способ заменить кавычки "исправленными" котировками в пользовательском вводе.

Идея

Вот фрагмент, кратко демонстрирующий принцип:
Для котировок "правильные" имеют открытие " и закрытие ", поэтому его необходимо заменить хорошим способом.

$('#myInput').on("keyup", function(e) {
  // The below doesn't work when there no space before or after.
  this.value = this.value.replace(/ "/g, ' "');
  this.value = this.value.replace(/" /g, '" ');
});

<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<textarea id="myInput"></textarea>

Ответ 1

У меня есть решение, которое в конечном итоге соответствует всем моим потребностям.
Я признаю, что это намного сложнее, чем TJ, что может быть идеальным для простых случаев.

Помните, моя главная проблема заключалась в невозможности использования \b из-за акцентированных символов.
Я смог избавиться от этой проблемы, используя решение из этой темы:
Удаление акцентов/диакритических знаков в строке в JavaScript

После этого я использовал модифицированную функцию, очень вдохновленную ответом здесь...
Как заменить символ в конкретном индексе в JavaScript?

... и было очень тяжело, много играя с RegEx, чтобы наконец добраться до этого решения:

var str_orig = '· I'm "happy" ! Ça y est, j'ai "osé", et mon "âme sœur" était au rendez-vous…
· The sign says: "Some text "some text" some text." and "Note the space here !"
⋅ "Inc"or"rect" quo"tes should " not be replaced.
· I said: "If it works on 'singles' too, I'd love it even more!"
word1" word2"
word1 word2"
"word1 word2
"word1" word2
"word1" word2"
"word1 word2"';

// Thanks, exactly what I needed!
var str_norm = str_orig.normalize('NFD').replace(/[\u0300-\u036f]/g, '');

// Thanks for inspiration
String.prototype.replaceQuoteAt = function(index, shift) {
  const replacers = ""‘"";
  var offset = 1 * (this[index] == "'") + 2 * (shift);
  return this.substr(0, index) + replacers[offset] + this.substr(index + 1);
}

// Opening quote: not after a boundary, not before a space or at the end
var re_start = /(?!\b)["'](?!(\s|$))/gi;
while ((match = re_start.exec(str_norm)) != null) {
  str_orig = str_orig.replaceQuoteAt(match.index, false);
}

// Closing quote: not at the beginning or after a space, not before a boundary
var re_end = /(?<!(^|\s))["'](?!\b)/gi;
while ((match = re_end.exec(str_norm)) != null) {
  str_orig = str_orig.replaceQuoteAt(match.index, true);
}

console.log("Corrected: \n", str_orig);

Ответ 2

Он работает во многих случаях, за исключением случаев, когда "слово" находится в самом начале или в конце предложения или строки.

Чтобы решить эту проблему, вы можете использовать чередование утверждения начала и конца строки и пробела, захватить его и использовать в замене:

this.value = this.value.replace(/(^| )"/g, '$1"');
this.value = this.value.replace(/"($| )/g, '"$1');

Альтернативой является ^|/$| , Группа захвата будет "" если она соответствует утверждению, или " " если она соответствует сапсе.

$('#myInput').on("keyup", function(e) {
  this.value = this.value.replace(/'/g, '');
  // The below doesn't work when there no space before or after.
  this.value = this.value.replace(/(^| )"/g, '$1"');
  this.value = this.value.replace(/"($| )/g, '"$1');
});

<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<textarea id="myInput"></textarea>

Ответ 3

Поэтому вместо того, чтобы следовать подходу regex replace, я использовал бы простой цикл с актом балансировки кавычек. Вы предполагаете, что каждая отдельная цитата будет совпадать с другой, и когда она это сделает, она будет заменена как пары.

Ниже приведена тестовая реализация для того же

String.prototype.replaceAt=function(index, replacement) {
return this.substr(0, index) + replacement+ this.substr(index + replacement.length);
}

tests  =[
// ['I'm "happy"! J'ai enfin "osé". La rencontre de mon "âme-sœur" a "été" au rendez-vous…
// and how it should look after correction:', 'I'm "happy"! J'ai enfin "osé". La rencontre de mon "âme-sœur" a "été" au rendez-vous…
// and how it should look after correction:'],
['tarun" lalwani"', 'tarun" lalwani"'],
['tarun lalwani"', 'tarun lalwani"'],
['"tarun lalwani','"tarun lalwani'],
['"tarun" lalwani','"tarun" lalwani'],
['"tarun" lalwani"','"tarun" lalwani"'],
['"tarun lalwani"', '"tarun lalwani"']
]

function isCharacterSeparator(value) {
return /", /.test(value)
}

for ([data, output] of tests) {
let qt = """"
let qtL = '"'
let qtR = '"'
let bal = 0
let pattern = /["""]/g
let data_new = data
while (match = pattern.exec(data)) {
    if (bal == 0) {
        if (match.index == 0) {
            data_new = data_new.replaceAt(match.index, qt[bal]);
            bal = 1
        } else {
            if (isCharacterSeparator(data_new[match.index-1])) {
                data_new = data_new.replaceAt(match.index, qtL);
            } else {
                data_new = data_new.replaceAt(match.index, qtR);
            }
        }
    } else {
        if (match.index == data.length - 1) {
            data_new = data_new.replaceAt(match.index, qtR);
        } else if (isCharacterSeparator(data_new[match.index-1])) {
            if (isCharacterSeparator(data_new[match.index+1])) {
                //previous is separator as well as next one too
                // "tarun " lalwani"
                // take a call what needs to be done here?

            } else {
                data_new = data_new.replaceAt(match.index, qtL);
            }
        } else {
            if (isCharacterSeparator(data_new[match.index+1])) {
                data_new = data_new.replaceAt(match.index, qtL);
            } else {
                data_new = data_new.replaceAt(match.index, qtR);
            }
        }
    }


}

console.log(data_new)
if (data_new != output) {
  console.log('Failed to parse '${data}' Actual='${data_new}' Expected='${output}'')
} ;
}