Skip to content

Commit 8fbd093

Browse files
committed
Optimize webhooks JSON with compact structure and string interning
1 parent 4429c41 commit 8fbd093

File tree

13 files changed

+34287
-49189
lines changed

13 files changed

+34287
-49189
lines changed

.github/workflows/buildtest.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,31 @@ jobs:
6464
echo ""
6565
exit 1
6666
fi
67+
68+
validate-webhooks:
69+
runs-on: ubuntu-latest
70+
name: Validate webhook optimization
71+
72+
steps:
73+
- uses: actions/checkout@v4
74+
- name: Use Node.js 22.x
75+
uses: actions/setup-node@v4
76+
with:
77+
node-version: 22.x
78+
cache: 'npm'
79+
registry-url: 'https://npm.pkg.github.com'
80+
- run: npm ci
81+
env:
82+
NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
83+
- name: Generate full webhooks file
84+
run: cd languageservice && npm run update-webhooks
85+
- name: Run optimization validation tests
86+
run: cd languageservice && npm test -- --testPathPattern=eventPayloads
87+
- name: Verify validation tests ran
88+
run: |
89+
if [ ! -f languageservice/src/context-providers/events/webhooks.full.validation-complete ]; then
90+
echo "ERROR: Validation tests did not run!"
91+
echo "The webhooks.full.validation-complete marker file was not created."
92+
exit 1
93+
fi
94+
echo "Validation tests completed at: $(cat languageservice/src/context-providers/events/webhooks.full.validation-complete)"

.gitignore

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ node_modules
77
# Minified JSON (generated at build time)
88
*.min.json
99

10-
# Intermediate JSON for size comparison (generated by update-webhooks --all)
11-
*.all.json
12-
*.drop.json
13-
*.strip.json
10+
# Full webhooks source (generated by update-webhooks, used for validation tests)
11+
*.full.json
12+
13+
# Validation marker (generated by tests)
14+
*.validation-complete

docs/json-data-files.md

Lines changed: 87 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ This document describes the JSON data files used by the language service package
66

77
The language service uses several JSON files containing schema definitions, webhook payloads, and other metadata. To reduce bundle size, these files are:
88

9-
1. **Optimized at generation time** — unused events are dropped, unused fields are stripped
10-
2. **Minified at build time** — whitespace is removed to produce `.min.json` files
9+
1. **Optimized at generation time** — unused events are dropped, unused fields are stripped, shared objects are deduplicated, property names are interned
10+
2. **Compacted using a space-efficient format** — params use type-based dispatch arrays instead of objects
11+
3. **Minified at build time** — whitespace is removed to produce `.min.json` files
1112

1213
The source `.json` files are human-readable and checked into the repository. The `.min.json` files are generated during build and gitignored.
1314

@@ -19,6 +20,7 @@ The source `.json` files are human-readable and checked into the repository. The
1920
|------|-------------|
2021
| `src/context-providers/events/webhooks.json` | Webhook event payload schemas for autocompletion |
2122
| `src/context-providers/events/objects.json` | Deduplicated shared object definitions referenced by webhooks |
23+
| `src/context-providers/events/strings.json` | Interned property names shared by webhooks and objects |
2224
| `src/context-providers/events/schedule.json` | Schedule event context data |
2325
| `src/context-providers/events/workflow_call.json` | Reusable workflow call context data |
2426
| `src/context-providers/descriptions.json` | Context variable descriptions for hover |
@@ -33,7 +35,7 @@ The source `.json` files are human-readable and checked into the repository. The
3335

3436
### Webhooks and Objects
3537

36-
The `webhooks.json` and `objects.json` files are generated from the [GitHub REST API description](https://github.com/github/rest-api-description):
38+
The `webhooks.json`, `objects.json`, and `strings.json` files are generated from the [GitHub REST API description](https://github.com/github/rest-api-description):
3739

3840
```bash
3941
cd languageservice
@@ -44,9 +46,10 @@ This script:
4446
1. Fetches webhook schemas from the GitHub API description
4547
2. **Validates** all events are categorized (fails if new events are found)
4648
3. **Drops** events that aren't valid workflow triggers (see [Dropped Events](#dropped-events))
47-
4. **Strips** unused fields like `description` and `summary` (see [Stripped Fields](#stripped-fields))
49+
4. **Compacts** params into a space-efficient array format, keeping only `name`, `description`, and `childParamsGroups` (see [Compact Format](#compact-format))
4850
5. **Deduplicates** shared object definitions into `objects.json`
49-
6. Writes the optimized, pretty-printed JSON files
51+
6. **Interns** duplicate property names into `strings.json` (see [String Interning](#string-interning))
52+
7. Writes the optimized, pretty-printed JSON files
5053

5154
### Handling New Webhook Events
5255

@@ -101,13 +104,15 @@ The code imports the minified versions:
101104

102105
```ts
103106
import webhooks from "./events/webhooks.min.json"
107+
import objects from "./events/objects.min.json"
108+
import strings from "./events/strings.min.json"
104109
```
105110

106111
## CI Verification
107112

108113
CI verifies that generated source files are up-to-date:
109114

110-
1. Runs `npm run update-webhooks` to regenerate webhooks.json and objects.json
115+
1. Runs `npm run update-webhooks` to regenerate webhooks.json, objects.json, and strings.json
111116
2. Checks for uncommitted changes with `git diff --exit-code`
112117

113118
The `.min.json` files are generated at build time and are not committed to the repository.
@@ -120,31 +125,93 @@ Webhook events that aren't valid workflow `on:` triggers are dropped (e.g., `ins
120125

121126
See `DROPPED_EVENTS` in `script/webhooks/index.ts` for the full list.
122127

123-
## Stripped Fields
128+
## Compact Format
124129

125-
Unused fields are stripped to reduce bundle size. For example:
130+
Params are converted from verbose objects into compact arrays, keeping only the fields needed for autocompletion and hover docs (`name`, `description`, `childParamsGroups`). Unused fields like `type`, `in`, `isRequired`, `enum`, and `default` are discarded.
131+
132+
| Format | Meaning |
133+
|--------|---------|
134+
| `"name"` | Name only (no description, no children) |
135+
| `[name, desc]` | Name + description (arr[1] is a string) |
136+
| `[name, children]` | Name + children (arr[1] is an array) |
137+
| `[name, desc, children]` | Name + description + children |
138+
139+
The reader uses `typeof arr[1]` to determine the format: if it's a string, it's a description; if it's an array, it's children.
140+
141+
**Example:**
126142

127143
```json
128-
// Before (from webhooks.all.json)
144+
// Before (object format)
129145
{
130-
"type": "object",
131146
"name": "issue",
132-
"in": "body",
133147
"description": "The issue itself.",
134-
"isRequired": true,
135-
"childParamsGroups": [...]
148+
"childParamsGroups": [
149+
{ "name": "id" },
150+
{ "name": "title", "description": "Issue title" }
151+
]
136152
}
137153

138-
// After (webhooks.json)
154+
// After (compact format)
155+
["issue", "The issue itself.", [
156+
"id",
157+
["title", "Issue title"]
158+
]]
159+
```
160+
161+
## String Interning
162+
163+
Property names that appear 2+ times are "interned" into a shared string table (`strings.json`). In the compact arrays, these names are replaced with numeric indices:
164+
165+
```json
166+
// strings.json
167+
["url", "id", "name", ...] // Index 0 = "url", 1 = "id", 2 = "name"
168+
169+
// webhooks.json - uses indices instead of strings
170+
["push", [
171+
[0, "The URL..."], // 0 = "url" from string table
172+
[1, "Unique ID"], // 1 = "id"
173+
2 // 2 = "name" (name-only, no description)
174+
]]
175+
```
176+
177+
**How to distinguish indices from other values:**
178+
179+
- **Top-level numbers in `p` arrays** → Object indices (references into `objects.json`)
180+
- **Nested numbers inside compact arrays** → String indices (references into `strings.json`)
181+
- **Literal strings** → Singletons (names appearing only once, not interned)
182+
183+
Singletons are kept as literal strings for readability and to avoid the overhead of adding rarely-used names to the string table.
184+
185+
## Deduplication
186+
187+
Shared object definitions are extracted into `objects.json` and referenced by index:
188+
189+
```json
190+
// objects.json
191+
[
192+
["url", "The URL"], // Index 0
193+
["id", "Unique identifier"], // Index 1
194+
[...]
195+
]
196+
197+
// webhooks.json - top-level numbers reference objects
139198
{
140-
"name": "issue",
141-
"description": "The issue itself.",
142-
"childParamsGroups": [...]
199+
"push": {
200+
"default": {
201+
"p": [0, 1, ["ref", "The git ref"]] // 0 and 1 are object indices
202+
}
203+
}
143204
}
144205
```
145206

146-
Only `name`, `description`, and `childParamsGroups` are kept — these are used for autocompletion and hover docs.
207+
This reduces duplication when the same object structure appears in multiple events (e.g., `repository`, `sender`, `organization`).
208+
209+
## Size Reduction
147210

148-
To compare all fields vs stripped, run `npm run update-webhooks -- --all` and diff the `.all.json` files against the regular ones.
211+
The optimizations achieve approximately 97% file size reduction:
149212

150-
See `EVENT_ACTION_FIELDS` and `BODY_PARAM_FIELDS` in `script/webhooks/index.ts` to modify what gets stripped.
213+
| Stage | Minified | Gzip |
214+
|-------|----------|------|
215+
| Original (webhooks.full.json) | 6.7 MB | 310 KB |
216+
| After optimization | 209 KB | 22 KB |
217+
| **Reduction** | **97%** | **93%** |

languageservice/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@
3737
"format-check": "prettier --check '**/*.ts'",
3838
"lint": "eslint 'src/**/*.ts'",
3939
"lint-fix": "eslint --fix 'src/**/*.ts'",
40-
"minify-json": "node ../script/minify-json.js src/context-providers/descriptions.json src/context-providers/events/webhooks.json src/context-providers/events/objects.json src/context-providers/events/schedule.json src/context-providers/events/workflow_call.json",
40+
"minify-json": "node ../script/minify-json.js src/context-providers/descriptions.json src/context-providers/events/webhooks.json src/context-providers/events/objects.json src/context-providers/events/strings.json src/context-providers/events/schedule.json src/context-providers/events/workflow_call.json",
4141
"prebuild": "npm run minify-json",
4242
"prepublishOnly": "npm run build && npm run test",
4343
"pretest": "npm run minify-json",
4444
"test": "NODE_OPTIONS=\"--experimental-vm-modules\" jest",
4545
"test-watch": "NODE_OPTIONS=\"--experimental-vm-modules\" jest --watch",
46-
"update-webhooks": "npx tsx script/webhooks/index.ts",
46+
"update-webhooks": "npx tsx script/webhooks/update-webhooks.ts",
4747
"watch": "tsc --build tsconfig.build.json --watch"
4848
},
4949
"dependencies": {

languageservice/script/webhooks/deduplicate.ts

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,38 @@
11
import Webhook from "./webhook";
22

3+
/**
4+
* Get the name from a param.
5+
* Formats: "name" (string), or [name, ...] (array)
6+
*/
7+
function getParamName(param: any): string {
8+
if (typeof param === "string") {
9+
return param;
10+
}
11+
if (Array.isArray(param)) {
12+
return param[0];
13+
}
14+
return param.name;
15+
}
16+
17+
/**
18+
* Get params from a webhook action.
19+
* Uses 'p' (short key) if present, falls back to 'bodyParameters'
20+
*/
21+
function getParams(webhook: any): any[] {
22+
return webhook.p || webhook.bodyParameters || [];
23+
}
24+
25+
/**
26+
* Set params on a webhook action using the short key 'p'
27+
*/
28+
function setParams(webhook: any, params: any[]): void {
29+
if (webhook.p !== undefined) {
30+
webhook.p = params;
31+
} else {
32+
webhook.bodyParameters = params;
33+
}
34+
}
35+
336
// Store any repeated body parameters in an array
437
// and replace them in the webhook with an index in the array
538
export function deduplicateWebhooks(webhooks: Record<string, Record<string, Webhook>>): any[] {
@@ -10,10 +43,11 @@ export function deduplicateWebhooks(webhooks: Record<string, Record<string, Webh
1043
const objectCount: Record<string, number> = {};
1144

1245
for (const webhook of iterateWebhooks(webhooks)) {
13-
for (const param of webhook.bodyParameters) {
14-
objectsByName[param.name] ||= [];
15-
const index = findOrAdd(param, objectsByName[param.name]);
16-
const key = `${param.name}:${index}`;
46+
for (const param of getParams(webhook)) {
47+
const name = getParamName(param);
48+
objectsByName[name] ||= [];
49+
const index = findOrAdd(param, objectsByName[name]);
50+
const key = `${name}:${index}`;
1751
objectCount[key] ||= 0;
1852
objectCount[key]++;
1953
}
@@ -27,18 +61,19 @@ export function deduplicateWebhooks(webhooks: Record<string, Record<string, Webh
2761

2862
for (const webhook of iterateWebhooks(webhooks)) {
2963
const newParams: any[] = [];
30-
for (const param of webhook.bodyParameters) {
31-
const index = find(param, objectsByName[param.name]);
32-
const key = `${param.name}:${index}`;
64+
for (const param of getParams(webhook)) {
65+
const name = getParamName(param);
66+
const index = find(param, objectsByName[name]);
67+
const key = `${name}:${index}`;
3368
if (objectCount[key] > 1) {
34-
newParams.push(indexForParam(param, index, bodyParamIndexMap, duplicatedBodyParams));
69+
newParams.push(indexForParam(param, name, index, bodyParamIndexMap, duplicatedBodyParams));
3570
} else {
3671
// If an object is only used once, keep it inline
3772
newParams.push(param);
3873
}
3974
}
4075

41-
webhook.bodyParameters = newParams;
76+
setParams(webhook, newParams);
4277
}
4378

4479
return duplicatedBodyParams;
@@ -74,11 +109,12 @@ function find(param: any, objects: any[]): number {
74109

75110
function indexForParam(
76111
param: any,
112+
paramName: string,
77113
paramNameIndex: number,
78114
objectIndexMap: Record<string, number>,
79115
duplicatedBodyParams: any[]
80116
): number {
81-
const key = `${param.name}:${paramNameIndex}`;
117+
const key = `${paramName}:${paramNameIndex}`;
82118

83119
const existingIndex = objectIndexMap[key];
84120
if (existingIndex !== undefined) {

0 commit comments

Comments
 (0)