Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions experiments/test_hyphenated_words.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/**
* Test hyphenated words are preserved in parsing
*/
import { Parser } from '../js/src/Parser.js';
import { formatLinks } from '../js/src/Link.js';

const parser = new Parser();

const testCases = [
// Hyphenated names
'Jean-Luc Picard',
'conan-center-index',
'a-b-c-d',

// Math between digits (should tokenize)
'1-2',
'10-20',
'a1-b2', // Mixed - should not tokenize because there's a letter on each side

// Variable-like names
'my-var-name',
'test-case-1',
];

console.log('=== Hyphenated Word Tests ===\n');

for (const input of testCases) {
const links = parser.parse(input);
const formatted = formatLinks(links);
console.log(`Input: "${input}"`);
console.log(`Values: ${links[0]?.values?.map(v => v.id).join(' | ') || 'none'}`);
console.log(`Formatted: "${formatted}"`);
console.log('---');
}
53 changes: 53 additions & 0 deletions experiments/test_punctuation_current.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/**
* Experiment to understand current behavior with punctuation and math symbols
*/
import { Parser } from '../js/src/Parser.js';
import { formatLinks } from '../js/src/Link.js';

const parser = new Parser();

// Test cases from the issue
const testCases = [
// Punctuation tests
'1, 2 and 3',
'1,2,3',
'1. 2. 3.',
'1.2.3',
'hello, world',

// Math symbol tests
'1+1',
'1 + 1',
'1+1,1/1,1*1',
'1 + 1 , 1 / 1 , 1 * 1',
'x+y=z',
'a-b',

// Other punctuation
'hello;world',
'hello!world',
'hello?world',

// Already quoted versions
'"1,"',
'"1."',
'"1,2,3"',
];

console.log('=== Current Parsing Behavior ===\n');

for (const input of testCases) {
try {
const links = parser.parse(input);
const formatted = formatLinks(links);
console.log(`Input: "${input}"`);
console.log(`Parsed: ${JSON.stringify(links, null, 2)}`);
console.log(`Formatted: "${formatted}"`);
console.log(`Values: ${links[0]?.values?.map(v => v.id).join(' | ') || 'none'}`);
console.log('---');
} catch (e) {
console.log(`Input: "${input}"`);
console.log(`Error: ${e.message}`);
console.log('---');
}
}
77 changes: 77 additions & 0 deletions experiments/test_punctuation_new.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/**
* Experiment to test the new punctuation and math symbol tokenization behavior
*/
import { Parser } from '../js/src/Parser.js';
import { formatLinks } from '../js/src/Link.js';
import { FormatOptions } from '../js/src/FormatOptions.js';

// Create parsers with different settings
const parserWithTokenization = new Parser({ tokenizeSymbols: true });
const parserWithoutTokenization = new Parser({ tokenizeSymbols: false });

// Test cases from the issue
const testCases = [
// From issue description
'1, 2 and 3',
'1,2,3',
'1+1,1/1,1*1',

// Additional punctuation tests
'hello, world',
'1. 2. 3.',
'1.2.3',

// Math tests
'1+1',
'1 + 1',
'x+y=z',
'a-b',

// Quoted strings should preserve punctuation
'"1,"',
'"1."',
'"1,2,3"',
'"hello, world"',

// Mixed
'test "1,2,3" more',
];

console.log('=== New Parsing Behavior (with tokenization) ===\n');

for (const input of testCases) {
try {
const links = parserWithTokenization.parse(input);
const formatted = formatLinks(links);
const compactOptions = new FormatOptions({ compactSymbols: true });
const compactFormatted = formatLinks(links, compactOptions);

console.log(`Input: "${input}"`);
console.log(`Values: ${links[0]?.values?.map(v => v.id).join(' | ') || 'none'}`);
console.log(`Formatted: "${formatted}"`);
console.log(`Compact: "${compactFormatted}"`);
console.log('---');
} catch (e) {
console.log(`Input: "${input}"`);
console.log(`Error: ${e.message}`);
console.log('---');
}
}

console.log('\n=== Without Tokenization (backwards compatible) ===\n');

for (const input of ['1,2,3', '1+1', 'hello, world']) {
try {
const links = parserWithoutTokenization.parse(input);
const formatted = formatLinks(links);

console.log(`Input: "${input}"`);
console.log(`Values: ${links[0]?.values?.map(v => v.id).join(' | ') || 'none'}`);
console.log(`Formatted: "${formatted}"`);
console.log('---');
} catch (e) {
console.log(`Input: "${input}"`);
console.log(`Error: ${e.message}`);
console.log('---');
}
}
Loading