From 58cb5eadec1481c57f3656ed81800e4e9cce9f63 Mon Sep 17 00:00:00 2001 From: Garrett Luu Date: Wed, 22 Jul 2020 15:47:06 -0700 Subject: [PATCH 1/6] Added support for lists WIP --- src/codecs/ODT/index.ts | 68 +++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/src/codecs/ODT/index.ts b/src/codecs/ODT/index.ts index a765500..a4097f0 100644 --- a/src/codecs/ODT/index.ts +++ b/src/codecs/ODT/index.ts @@ -8,9 +8,21 @@ function process_para(child: Node, root: WJSPara) { case 1 /*ELEMENT_NODE*/: const element = (child as Element); if(element.tagName.match(/draw:/)) return; - if(element.tagName == "text:s") root.elts.push({t:"s", v: " ".repeat(+element.getAttribute("c") || 1)}); - if(element.tagName == "text:tab") root.elts.push({t:"s", v: "\t".repeat(+element.getAttribute("c") || 1)}); - element.childNodes.forEach((child) => process_para(child, root)); + switch(element.tagName) { + case "text:s": + root.elts.push({ t: "s", v: " ".repeat(+element.getAttribute("c") || 1) }); + break; + case "text:tab": + root.elts.push({ t: "s", v: "\t".repeat(+element.getAttribute("c") || 1) }); + break; + case "text:note": + return; + default: + element.childNodes.forEach((child) => { + process_para(child, root); + }); + break; + } break; case 3 /*TEXT_NODE*/: root.elts.push({t:"s", v: child.textContent}); break; default: throw "unsupported node type " + child.nodeType; @@ -97,9 +109,45 @@ function process_index_body(child: Node, root: WJSPara[]) { } } +/* children */ +function process_list(child: Node, root: WJSPara) { + switch(child.nodeType) { + case 1 /*ELEMENT_NODE*/: + const element = (child as Element); + switch(element.tagName) { + // Recursive case + case "text:list-item": + element.childNodes.forEach((child) => process_list_item(child, root)); + break; + } + } +} + +/* children */ +function process_list_item(child: Node, root: WJSPara) { + switch(child.nodeType) { + case 1: + const element = (child as Element); + switch(element.tagName) { + case "text:list": + element.childNodes.forEach((child) => process_list(child, root)); + break; + case "text:h": + case "text:p": + element.childNodes.forEach((child) => process_para(child, root)); + break; + } + } +} + +function process_note(child: Node, root: WJSPara) { + +} + /* 3.4 children */ function process_body_elt(child: ChildNode, root: boolean = false): WJSPara[]|WJSPara|void { const para: WJSPara = { elts: []}; + const paraArray: WJSPara[] = []; switch(child.nodeType) { case 1 /*ELEMENT_NODE*/: const element = (child as Element); @@ -118,16 +166,13 @@ function process_body_elt(child: ChildNode, root: boolean = false): WJSPara[]|WJ return para; // text:illustration-index case "text:illustration-index": - const paraArray: WJSPara[] = []; - console.log() element.childNodes.forEach((child) => process_illustration_index(child, paraArray)); return paraArray; - // case "text:illustration-index-body": - // recursively parse children - // para.elts.push(process_illustration_index(element, para)); - // return para; // text:list - case "text:illustration-index-source": + case "text:list": + element.childNodes.forEach((child) => process_list(child, para)); + return paraArray; + case "text:note": case "text:sequence-decls": case "text:variable-decls": case "text:tracked-changes": @@ -167,9 +212,12 @@ export function parse_cfb(file: CFB$Container): WJSDoc { textelt.childNodes.forEach(child => { const res = process_body_elt(child, true); if(res) { + // Check if res is an array if ((res as WJSPara[]).push == undefined) { + // If not, push to doc doc.p.push(res as WJSPara); } else { + // If yes, push each paragraph to doc (res as WJSPara[]).forEach(para => { doc.p.push(para); }); From 472046810fc08780cf800d5803cbdc3dd74864c4 Mon Sep 17 00:00:00 2001 From: Garrett Luu Date: Wed, 22 Jul 2020 16:40:31 -0700 Subject: [PATCH 2/6] Added support for iteration in ordered lists --- src/codecs/ODT/index.ts | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/codecs/ODT/index.ts b/src/codecs/ODT/index.ts index a4097f0..3da51c6 100644 --- a/src/codecs/ODT/index.ts +++ b/src/codecs/ODT/index.ts @@ -110,38 +110,51 @@ function process_index_body(child: Node, root: WJSPara[]) { } /* children */ -function process_list(child: Node, root: WJSPara) { +function process_list(child: Node, root: WJSPara[], index: number) { switch(child.nodeType) { case 1 /*ELEMENT_NODE*/: const element = (child as Element); switch(element.tagName) { // Recursive case case "text:list-item": - element.childNodes.forEach((child) => process_list_item(child, root)); + element.childNodes.forEach((child) => { + process_list_item(child, root, index); + index++; + + }); break; } } } /* children */ -function process_list_item(child: Node, root: WJSPara) { +function process_list_item(child: Node, root: WJSPara[], iter: number) { switch(child.nodeType) { case 1: const element = (child as Element); switch(element.tagName) { case "text:list": - element.childNodes.forEach((child) => process_list(child, root)); + let index : number = 1; + element.childNodes.forEach((child) => { + process_list(child, root, index); + index++; + }); break; case "text:h": case "text:p": - element.childNodes.forEach((child) => process_para(child, root)); + const para : WJSPara = { elts: [] }; + para.elts.push({t: "s", v: "" + iter + ". "}); + element.childNodes.forEach((child) => { + process_para(child, para); + }); + root.push(para); break; } } } function process_note(child: Node, root: WJSPara) { - + } /* 3.4 children */ @@ -170,7 +183,11 @@ function process_body_elt(child: ChildNode, root: boolean = false): WJSPara[]|WJ return paraArray; // text:list case "text:list": - element.childNodes.forEach((child) => process_list(child, para)); + let index : number = 1; + element.childNodes.forEach((child) => { + process_list(child, paraArray, index); + index++; + }); return paraArray; case "text:note": case "text:sequence-decls": From 2557cf112e743d345589f494a163577fafe561fa Mon Sep 17 00:00:00 2001 From: Garrett Luu Date: Wed, 22 Jul 2020 20:56:43 -0700 Subject: [PATCH 3/6] Implemented endnote processing WIP --- src/codecs/ODT/index.ts | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/codecs/ODT/index.ts b/src/codecs/ODT/index.ts index 3da51c6..e028d06 100644 --- a/src/codecs/ODT/index.ts +++ b/src/codecs/ODT/index.ts @@ -16,6 +16,9 @@ function process_para(child: Node, root: WJSPara) { root.elts.push({ t: "s", v: "\t".repeat(+element.getAttribute("c") || 1) }); break; case "text:note": + element.childNodes.forEach((child) => { + process_note(child, root); + }); return; default: element.childNodes.forEach((child) => { @@ -120,7 +123,6 @@ function process_list(child: Node, root: WJSPara[], index: number) { element.childNodes.forEach((child) => { process_list_item(child, root, index); index++; - }); break; } @@ -154,7 +156,23 @@ function process_list_item(child: Node, root: WJSPara[], iter: number) { } function process_note(child: Node, root: WJSPara) { - + switch(child.nodeType) { + case 1 /*ELEMENT_NODE*/: + const element = (child as Element); + switch(element.tagName) { + case "text:note-citation": + element.childNodes.forEach((child) => process_para(child, root)); + break; + case "text:note-body": + element.childNodes.forEach((child) => process_note(child, root)); + break; + case "text:p": + case "text:h": + element.childNodes.forEach((child) => process_para(child, root)); + break; + } + break; + } } /* 3.4 children */ From a2c16f113d97d586860d0fbb6d5c8598e48e0ad0 Mon Sep 17 00:00:00 2001 From: Garrett Luu Date: Thu, 23 Jul 2020 15:05:08 -0700 Subject: [PATCH 4/6] Implemented endnote parsing --- src/codecs/ODT/index.ts | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/codecs/ODT/index.ts b/src/codecs/ODT/index.ts index e028d06..186ceba 100644 --- a/src/codecs/ODT/index.ts +++ b/src/codecs/ODT/index.ts @@ -1,6 +1,6 @@ import { CFB$Container, find } from "cfb"; import { JSDOM } from "jsdom"; -import { WJSDoc, WJSPara, WJSTable, WJSTableRow, WJSTableCell, WJSParaElement } from "../../types"; +import { WJSDoc, WJSPara, WJSTable, WJSTableRow, WJSTableCell, WJSParaElement, WJSEndNote } from "../../types"; /* 5.1.3 children */ function process_para(child: Node, root: WJSPara) { @@ -9,16 +9,20 @@ function process_para(child: Node, root: WJSPara) { const element = (child as Element); if(element.tagName.match(/draw:/)) return; switch(element.tagName) { + // Parse tabs and spaces case "text:s": root.elts.push({ t: "s", v: " ".repeat(+element.getAttribute("c") || 1) }); break; case "text:tab": root.elts.push({ t: "s", v: "\t".repeat(+element.getAttribute("c") || 1) }); break; + // Special case for endnotes case "text:note": + let body : WJSPara[] = []; element.childNodes.forEach((child) => { - process_note(child, root); + process_note(child, root, body); }); + root.elts.push({ t: "endnote", id: +element.getAttribute("id"), p: body}); return; default: element.childNodes.forEach((child) => { @@ -155,7 +159,8 @@ function process_list_item(child: Node, root: WJSPara[], iter: number) { } } -function process_note(child: Node, root: WJSPara) { +function process_note(child: Node, root: WJSPara, body: WJSPara[]) { + const para : WJSPara = { elts: [] }; switch(child.nodeType) { case 1 /*ELEMENT_NODE*/: const element = (child as Element); @@ -164,11 +169,11 @@ function process_note(child: Node, root: WJSPara) { element.childNodes.forEach((child) => process_para(child, root)); break; case "text:note-body": - element.childNodes.forEach((child) => process_note(child, root)); + element.childNodes.forEach((child) => process_note(child, root, body)); break; case "text:p": case "text:h": - element.childNodes.forEach((child) => process_para(child, root)); + element.childNodes.forEach((child) => process_para(child, para)); break; } break; From fac738b776def080c6c2a51f4ed23475e1667e5a Mon Sep 17 00:00:00 2001 From: Garrett Luu Date: Thu, 23 Jul 2020 15:29:15 -0700 Subject: [PATCH 5/6] Merged process_list functions --- src/codecs/ODT/index.ts | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/codecs/ODT/index.ts b/src/codecs/ODT/index.ts index 186ceba..0b69a66 100644 --- a/src/codecs/ODT/index.ts +++ b/src/codecs/ODT/index.ts @@ -86,7 +86,9 @@ function process_illustration_index(child: Node, root: WJSPara[]) { case 1 /*ELEMENT_NODE*/: const element = (child as Element); switch(element.tagName) { - case "text:illustration-index-source": break; + // Skip over this + case "text:illustration-index-source": + return; case "text:index-body": element.childNodes.forEach((child) => process_index_body(child, root)); break; @@ -125,31 +127,21 @@ function process_list(child: Node, root: WJSPara[], index: number) { // Recursive case case "text:list-item": element.childNodes.forEach((child) => { - process_list_item(child, root, index); + process_list(child, root, index); index++; }); break; - } - } -} - -/* children */ -function process_list_item(child: Node, root: WJSPara[], iter: number) { - switch(child.nodeType) { - case 1: - const element = (child as Element); - switch(element.tagName) { case "text:list": - let index : number = 1; + let i : number = 1; element.childNodes.forEach((child) => { - process_list(child, root, index); - index++; + process_list(child, root, i); + i++; }); break; case "text:h": case "text:p": const para : WJSPara = { elts: [] }; - para.elts.push({t: "s", v: "" + iter + ". "}); + para.elts.push({t: "s", v: "" + index + ". "}); element.childNodes.forEach((child) => { process_para(child, para); }); From 49b281c4dbdd7132e80ad28494031e4f4a95f809 Mon Sep 17 00:00:00 2001 From: Garrett Luu Date: Thu, 23 Jul 2020 21:23:41 -0700 Subject: [PATCH 6/6] Fixed empty p array in endnote node --- src/codecs/ODT/index.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/codecs/ODT/index.ts b/src/codecs/ODT/index.ts index 0b69a66..51c9fcf 100644 --- a/src/codecs/ODT/index.ts +++ b/src/codecs/ODT/index.ts @@ -165,7 +165,10 @@ function process_note(child: Node, root: WJSPara, body: WJSPara[]) { break; case "text:p": case "text:h": - element.childNodes.forEach((child) => process_para(child, para)); + element.childNodes.forEach((child) => { + process_para(child, para); + body.push(para); + }); break; } break;