@@ -281,13 +281,12 @@ defmodule DiffWeb.DiffLiveView do
281281 case element do
282282 { :ok , patch } ->
283283 if patch . chunks != [ ] do
284- patch_html =
285- Phoenix.View . render_to_string ( DiffWeb.RenderView , "patch.html" , patch: patch )
286- |> sanitize_utf8 ( )
287-
284+ # Store raw patch data directly - convert struct to map and sanitize for JSON encoding
285+ sanitized_patch = sanitize_patch_content ( patch )
286+ patch_data = Jason . encode! ( struct_to_map ( sanitized_patch ) )
288287 patch_id = "patch-#{ index } "
289288
290- Diff.Storage . put_patch ( package , from , to , patch_id , patch_html )
289+ Diff.Storage . put_patch ( package , from , to , patch_id , patch_data )
291290
292291 additions = count_lines ( patch , "+" )
293292 deletions = count_lines ( patch , "-" )
@@ -306,13 +305,11 @@ defmodule DiffWeb.DiffLiveView do
306305 end
307306
308307 { :too_large , file_path } ->
309- too_large_html =
310- Phoenix.View . render_to_string ( DiffWeb.RenderView , "too_large.html" , file: file_path )
311- |> sanitize_utf8 ( )
312-
308+ # Store raw too_large data directly
309+ too_large_data = Jason . encode! ( % { type: "too_large" , file: file_path } )
313310 patch_id = "patch-#{ index } "
314311
315- Diff.Storage . put_patch ( package , from , to , patch_id , too_large_html )
312+ Diff.Storage . put_patch ( package , from , to , patch_id , too_large_data )
316313
317314 updated_metadata = % {
318315 acc_metadata
@@ -390,41 +387,171 @@ defmodule DiffWeb.DiffLiveView do
390387 defp parse_version ( input ) , do: Version . parse ( input )
391388
392389 defp sanitize_utf8 ( content ) when is_binary ( content ) do
393- # Replace invalid UTF-8 bytes with replacement character
394390 case String . valid? ( content ) do
395391 true ->
396392 content
397393
398394 false ->
399- # Convert to UTF-8, replacing invalid bytes
395+ # Multiple fallback strategies for invalid UTF-8
400396 content
401- |> :unicode . characters_to_binary ( :latin1 , :utf8 )
402- |> case do
403- result when is_binary ( result ) ->
404- result
405-
406- _ ->
407- # If conversion fails, scrub invalid bytes
408- content
409- |> String . codepoints ( )
410- |> Enum . filter ( & String . valid? / 1 )
411- |> Enum . join ( )
412- end
397+ |> sanitize_invalid_bytes ( )
413398 end
414399 end
415400
416401 defp sanitize_utf8 ( content ) , do: content
417402
403+ defp sanitize_invalid_bytes ( content ) do
404+ # Try different encoding conversions and fallbacks
405+ cond do
406+ # Try converting from common encodings
407+ latin1_result = safe_unicode_convert ( content , :latin1 , :utf8 ) ->
408+ latin1_result
409+
410+ # Try converting from ISO-8859-1
411+ iso_result = safe_unicode_convert ( content , { :encoding , :latin1 } , :utf8 ) ->
412+ iso_result
413+
414+ # Last resort: replace invalid bytes with replacement character
415+ true ->
416+ content
417+ |> :binary . bin_to_list ( )
418+ # Replace high bytes with '?'
419+ |> Enum . map ( fn byte -> if byte > 127 , do: 63 , else: byte end )
420+ |> :binary . list_to_bin ( )
421+ end
422+ end
423+
424+ defp safe_unicode_convert ( content , from , to ) do
425+ case :unicode . characters_to_binary ( content , from , to ) do
426+ result when is_binary ( result ) -> result
427+ _ -> nil
428+ end
429+ rescue
430+ _ -> nil
431+ end
432+
418433 defp load_patch_content ( package , from , to , patch_id ) do
419434 case Diff.Storage . get_patch ( package , from , to , patch_id ) do
420- { :ok , content } ->
421- sanitize_utf8 ( content )
435+ { :ok , patch_json } ->
436+ case Jason . decode ( patch_json ) do
437+ { :ok , % { "type" => "too_large" , "file" => file_path } } ->
438+ # Handle too_large patches
439+ Phoenix.View . render_to_string ( DiffWeb.RenderView , "too_large.html" , file: file_path )
440+ |> sanitize_utf8 ( )
441+
442+ { :ok , patch_data } when is_map ( patch_data ) ->
443+ # Handle normal patches - raw patch data stored directly as map
444+ # Check if this looks like a patch (has expected keys)
445+ if Map . has_key? ( patch_data , "chunks" ) or Map . has_key? ( patch_data , :chunks ) do
446+ Phoenix.View . render_to_string ( DiffWeb.RenderView , "patch.html" ,
447+ patch: atomize_patch ( patch_data )
448+ )
449+ |> sanitize_utf8 ( )
450+ else
451+ "<div class='patch-error'>Invalid patch format</div>"
452+ end
453+
454+ { :error , _ } ->
455+ "<div class='patch-error'>Invalid patch data</div>"
456+ end
422457
423458 { :error , _reason } ->
424459 "<div class='patch-error'>Failed to load patch</div>"
425460 end
426461 end
427462
463+ # Convert string keys back to atom keys for patch rendering
464+ # Also handle converting lists back to tuples where needed
465+ defp atomize_patch ( patch ) when is_map ( patch ) do
466+ patch
467+ |> Enum . map ( fn { k , v } ->
468+ atom_key = if is_binary ( k ) , do: String . to_existing_atom ( k ) , else: k
469+
470+ converted_value =
471+ case { atom_key , v } do
472+ # Convert index field back to tuple (was {nil, nil, nil} originally)
473+ { :index , [ a , b , c ] } -> { a , b , c }
474+ { "index" , [ a , b , c ] } -> { a , b , c }
475+ _ -> atomize_patch ( v )
476+ end
477+
478+ { atom_key , converted_value }
479+ end )
480+ |> Map . new ( )
481+ rescue
482+ ArgumentError ->
483+ # If atom doesn't exist, return original map with string keys
484+ patch
485+ |> Enum . map ( fn { k , v } ->
486+ converted_value =
487+ case { k , v } do
488+ { "index" , [ a , b , c ] } -> { a , b , c }
489+ _ -> atomize_patch ( v )
490+ end
491+
492+ { k , converted_value }
493+ end )
494+ |> Map . new ( )
495+ end
496+
497+ defp atomize_patch ( list ) when is_list ( list ) do
498+ Enum . map ( list , & atomize_patch / 1 )
499+ end
500+
501+ defp atomize_patch ( value ) , do: value
502+
503+ # Convert structs to maps recursively for JSON encoding
504+ defp struct_to_map ( % { __struct__: _ } = struct ) do
505+ struct
506+ |> Map . from_struct ( )
507+ |> Enum . map ( fn { k , v } -> { k , struct_to_map ( v ) } end )
508+ |> Map . new ( )
509+ end
510+
511+ defp struct_to_map ( tuple ) when is_tuple ( tuple ) do
512+ # Convert tuples to lists for JSON compatibility
513+ tuple
514+ |> Tuple . to_list ( )
515+ |> Enum . map ( & struct_to_map / 1 )
516+ end
517+
518+ defp struct_to_map ( list ) when is_list ( list ) do
519+ Enum . map ( list , & struct_to_map / 1 )
520+ end
521+
522+ defp struct_to_map ( map ) when is_map ( map ) do
523+ map
524+ |> Enum . map ( fn { k , v } -> { k , struct_to_map ( v ) } end )
525+ |> Map . new ( )
526+ end
527+
528+ defp struct_to_map ( value ) , do: value
529+
530+ # Sanitize all binary content in patch data to prevent JSON encoding errors
531+ defp sanitize_patch_content ( % { __struct__: _ } = struct ) do
532+ struct
533+ |> Map . from_struct ( )
534+ |> Enum . map ( fn { k , v } -> { k , sanitize_patch_content ( v ) } end )
535+ |> Map . new ( )
536+ |> then ( fn map -> struct ( struct . __struct__ , map ) end )
537+ end
538+
539+ defp sanitize_patch_content ( list ) when is_list ( list ) do
540+ Enum . map ( list , & sanitize_patch_content / 1 )
541+ end
542+
543+ defp sanitize_patch_content ( map ) when is_map ( map ) do
544+ map
545+ |> Enum . map ( fn { k , v } -> { k , sanitize_patch_content ( v ) } end )
546+ |> Map . new ( )
547+ end
548+
549+ defp sanitize_patch_content ( binary ) when is_binary ( binary ) do
550+ sanitize_utf8 ( binary )
551+ end
552+
553+ defp sanitize_patch_content ( value ) , do: value
554+
428555 defp parse_diff ( diff ) do
429556 case String . split ( diff , ":" , trim: true ) do
430557 [ app , from , to ] -> { app , from , to , build_url ( app , from , to ) }
0 commit comments