Skip to content

Commit 0b3f2e0

Browse files
committed
Cache patches instead of HTML
1 parent 2913b54 commit 0b3f2e0

File tree

3 files changed

+160
-33
lines changed

3 files changed

+160
-33
lines changed

lib/diff/storage/gcs.ex

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ defmodule Diff.Storage.GCS do
6262
end
6363
end
6464

65-
def put_patch(package, from_version, to_version, patch_id, patch_html) do
65+
def put_patch(package, from_version, to_version, patch_id, patch_data) do
6666
with {:ok, hash} <- combined_checksum(package, from_version, to_version),
6767
url = url(patch_key(package, from_version, to_version, hash, patch_id)),
6868
{:ok, 200, _headers, _body} <-
69-
Diff.HTTP.retry("gs", fn -> Diff.HTTP.put(url, headers(), patch_html) end) do
69+
Diff.HTTP.retry("gs", fn -> Diff.HTTP.put(url, headers(), patch_data) end) do
7070
:ok
7171
else
7272
{:ok, status, _headers, _body} ->
@@ -154,7 +154,7 @@ defmodule Diff.Storage.GCS do
154154
end
155155

156156
defp patch_key(package, from_version, to_version, hash, patch_id) do
157-
"diffs/patches/#{package}-#{from_version}-#{to_version}-#{hash}-#{patch_id}.html"
157+
"diffs/patches/#{package}-#{from_version}-#{to_version}-#{hash}-#{patch_id}.patch"
158158
end
159159

160160
defp metadata_key(package, from_version, to_version, hash) do

lib/diff/storage/local.ex

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,12 @@ defmodule Diff.Storage.Local do
5151
end
5252
end
5353

54-
def put_patch(package, from_version, to_version, patch_id, patch_html) do
54+
def put_patch(package, from_version, to_version, patch_id, patch_data) do
5555
with {:ok, hash} <- combined_checksum(package, from_version, to_version),
5656
filename = patch_key(package, from_version, to_version, hash, patch_id),
5757
path = Path.join([dir(), package, "patches", filename]),
5858
:ok <- File.mkdir_p(Path.dirname(path)) do
59-
File.write!(path, patch_html)
59+
File.write!(path, patch_data)
6060
:ok
6161
else
6262
{:error, reason} ->
@@ -79,7 +79,7 @@ defmodule Diff.Storage.Local do
7979
|> Enum.map(fn filename ->
8080
filename
8181
|> String.replace_prefix(prefix, "")
82-
|> String.replace_suffix(".html", "")
82+
|> String.replace_suffix(".patch", "")
8383
end)
8484
|> Enum.sort_by(fn patch_id ->
8585
# Extract numeric part for proper sorting (e.g., "patch-10" -> 10)
@@ -157,7 +157,7 @@ defmodule Diff.Storage.Local do
157157
end
158158

159159
defp patch_key(package, from_version, to_version, hash, patch_id) do
160-
"#{package}-#{from_version}-#{to_version}-#{hash}-#{patch_id}.html"
160+
"#{package}-#{from_version}-#{to_version}-#{hash}-#{patch_id}.patch"
161161
end
162162

163163
defp metadata_key(package, from_version, to_version, hash) do

lib/diff_web/live/diff_live_view.ex

Lines changed: 153 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -281,13 +281,12 @@ defmodule DiffWeb.DiffLiveView do
281281
case element do
282282
{:ok, patch} ->
283283
if patch.chunks != [] do
284-
patch_html =
285-
Phoenix.View.render_to_string(DiffWeb.RenderView, "patch.html", patch: patch)
286-
|> sanitize_utf8()
287-
284+
# Store raw patch data directly - convert struct to map and sanitize for JSON encoding
285+
sanitized_patch = sanitize_patch_content(patch)
286+
patch_data = Jason.encode!(struct_to_map(sanitized_patch))
288287
patch_id = "patch-#{index}"
289288

290-
Diff.Storage.put_patch(package, from, to, patch_id, patch_html)
289+
Diff.Storage.put_patch(package, from, to, patch_id, patch_data)
291290

292291
additions = count_lines(patch, "+")
293292
deletions = count_lines(patch, "-")
@@ -306,13 +305,11 @@ defmodule DiffWeb.DiffLiveView do
306305
end
307306

308307
{:too_large, file_path} ->
309-
too_large_html =
310-
Phoenix.View.render_to_string(DiffWeb.RenderView, "too_large.html", file: file_path)
311-
|> sanitize_utf8()
312-
308+
# Store raw too_large data directly
309+
too_large_data = Jason.encode!(%{type: "too_large", file: file_path})
313310
patch_id = "patch-#{index}"
314311

315-
Diff.Storage.put_patch(package, from, to, patch_id, too_large_html)
312+
Diff.Storage.put_patch(package, from, to, patch_id, too_large_data)
316313

317314
updated_metadata = %{
318315
acc_metadata
@@ -390,41 +387,171 @@ defmodule DiffWeb.DiffLiveView do
390387
defp parse_version(input), do: Version.parse(input)
391388

392389
defp sanitize_utf8(content) when is_binary(content) do
393-
# Replace invalid UTF-8 bytes with replacement character
394390
case String.valid?(content) do
395391
true ->
396392
content
397393

398394
false ->
399-
# Convert to UTF-8, replacing invalid bytes
395+
# Multiple fallback strategies for invalid UTF-8
400396
content
401-
|> :unicode.characters_to_binary(:latin1, :utf8)
402-
|> case do
403-
result when is_binary(result) ->
404-
result
405-
406-
_ ->
407-
# If conversion fails, scrub invalid bytes
408-
content
409-
|> String.codepoints()
410-
|> Enum.filter(&String.valid?/1)
411-
|> Enum.join()
412-
end
397+
|> sanitize_invalid_bytes()
413398
end
414399
end
415400

416401
defp sanitize_utf8(content), do: content
417402

403+
defp sanitize_invalid_bytes(content) do
404+
# Try different encoding conversions and fallbacks
405+
cond do
406+
# Try converting from common encodings
407+
latin1_result = safe_unicode_convert(content, :latin1, :utf8) ->
408+
latin1_result
409+
410+
# Try converting from ISO-8859-1
411+
iso_result = safe_unicode_convert(content, {:encoding, :latin1}, :utf8) ->
412+
iso_result
413+
414+
# Last resort: replace invalid bytes with replacement character
415+
true ->
416+
content
417+
|> :binary.bin_to_list()
418+
# Replace high bytes with '?'
419+
|> Enum.map(fn byte -> if byte > 127, do: 63, else: byte end)
420+
|> :binary.list_to_bin()
421+
end
422+
end
423+
424+
defp safe_unicode_convert(content, from, to) do
425+
case :unicode.characters_to_binary(content, from, to) do
426+
result when is_binary(result) -> result
427+
_ -> nil
428+
end
429+
rescue
430+
_ -> nil
431+
end
432+
418433
defp load_patch_content(package, from, to, patch_id) do
419434
case Diff.Storage.get_patch(package, from, to, patch_id) do
420-
{:ok, content} ->
421-
sanitize_utf8(content)
435+
{:ok, patch_json} ->
436+
case Jason.decode(patch_json) do
437+
{:ok, %{"type" => "too_large", "file" => file_path}} ->
438+
# Handle too_large patches
439+
Phoenix.View.render_to_string(DiffWeb.RenderView, "too_large.html", file: file_path)
440+
|> sanitize_utf8()
441+
442+
{:ok, patch_data} when is_map(patch_data) ->
443+
# Handle normal patches - raw patch data stored directly as map
444+
# Check if this looks like a patch (has expected keys)
445+
if Map.has_key?(patch_data, "chunks") or Map.has_key?(patch_data, :chunks) do
446+
Phoenix.View.render_to_string(DiffWeb.RenderView, "patch.html",
447+
patch: atomize_patch(patch_data)
448+
)
449+
|> sanitize_utf8()
450+
else
451+
"<div class='patch-error'>Invalid patch format</div>"
452+
end
453+
454+
{:error, _} ->
455+
"<div class='patch-error'>Invalid patch data</div>"
456+
end
422457

423458
{:error, _reason} ->
424459
"<div class='patch-error'>Failed to load patch</div>"
425460
end
426461
end
427462

463+
# Convert string keys back to atom keys for patch rendering
464+
# Also handle converting lists back to tuples where needed
465+
defp atomize_patch(patch) when is_map(patch) do
466+
patch
467+
|> Enum.map(fn {k, v} ->
468+
atom_key = if is_binary(k), do: String.to_existing_atom(k), else: k
469+
470+
converted_value =
471+
case {atom_key, v} do
472+
# Convert index field back to tuple (was {nil, nil, nil} originally)
473+
{:index, [a, b, c]} -> {a, b, c}
474+
{"index", [a, b, c]} -> {a, b, c}
475+
_ -> atomize_patch(v)
476+
end
477+
478+
{atom_key, converted_value}
479+
end)
480+
|> Map.new()
481+
rescue
482+
ArgumentError ->
483+
# If atom doesn't exist, return original map with string keys
484+
patch
485+
|> Enum.map(fn {k, v} ->
486+
converted_value =
487+
case {k, v} do
488+
{"index", [a, b, c]} -> {a, b, c}
489+
_ -> atomize_patch(v)
490+
end
491+
492+
{k, converted_value}
493+
end)
494+
|> Map.new()
495+
end
496+
497+
defp atomize_patch(list) when is_list(list) do
498+
Enum.map(list, &atomize_patch/1)
499+
end
500+
501+
defp atomize_patch(value), do: value
502+
503+
# Convert structs to maps recursively for JSON encoding
504+
defp struct_to_map(%{__struct__: _} = struct) do
505+
struct
506+
|> Map.from_struct()
507+
|> Enum.map(fn {k, v} -> {k, struct_to_map(v)} end)
508+
|> Map.new()
509+
end
510+
511+
defp struct_to_map(tuple) when is_tuple(tuple) do
512+
# Convert tuples to lists for JSON compatibility
513+
tuple
514+
|> Tuple.to_list()
515+
|> Enum.map(&struct_to_map/1)
516+
end
517+
518+
defp struct_to_map(list) when is_list(list) do
519+
Enum.map(list, &struct_to_map/1)
520+
end
521+
522+
defp struct_to_map(map) when is_map(map) do
523+
map
524+
|> Enum.map(fn {k, v} -> {k, struct_to_map(v)} end)
525+
|> Map.new()
526+
end
527+
528+
defp struct_to_map(value), do: value
529+
530+
# Sanitize all binary content in patch data to prevent JSON encoding errors
531+
defp sanitize_patch_content(%{__struct__: _} = struct) do
532+
struct
533+
|> Map.from_struct()
534+
|> Enum.map(fn {k, v} -> {k, sanitize_patch_content(v)} end)
535+
|> Map.new()
536+
|> then(fn map -> struct(struct.__struct__, map) end)
537+
end
538+
539+
defp sanitize_patch_content(list) when is_list(list) do
540+
Enum.map(list, &sanitize_patch_content/1)
541+
end
542+
543+
defp sanitize_patch_content(map) when is_map(map) do
544+
map
545+
|> Enum.map(fn {k, v} -> {k, sanitize_patch_content(v)} end)
546+
|> Map.new()
547+
end
548+
549+
defp sanitize_patch_content(binary) when is_binary(binary) do
550+
sanitize_utf8(binary)
551+
end
552+
553+
defp sanitize_patch_content(value), do: value
554+
428555
defp parse_diff(diff) do
429556
case String.split(diff, ":", trim: true) do
430557
[app, from, to] -> {app, from, to, build_url(app, from, to)}

0 commit comments

Comments
 (0)