{
  "raw_dir": "data/raw",
  "max_files": 6,
  "datasets": [
    {
      "id": "autocast",
      "file_count": 1,
      "total_bytes": 2016865,
      "sampled_files": [
        {
          "path": "data/raw/autocast/repo/competition/autocast_competition_test_set.json",
          "format": "json",
          "bytes": 2016865,
          "rows": null,
          "columns": []
        }
      ]
    },
    {
      "id": "closed_polymarket_2025h1",
      "file_count": 1,
      "total_bytes": 2673120,
      "sampled_files": [
        {
          "path": "data/raw/closed_polymarket_2025h1/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 2673120,
          "rows": 31627,
          "columns": [
            "condition_id",
            "market_slug",
            "end_date_iso",
            "category",
            "minimum_tick_size",
            "minimum_order_size",
            "liquidity",
            "volume"
          ]
        }
      ]
    },
    {
      "id": "forecast_snapshots_kalshi",
      "file_count": 3,
      "total_bytes": 13107576,
      "sampled_files": [
        {
          "path": "data/raw/forecast_snapshots_kalshi/config.json",
          "format": "json",
          "bytes": 347,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/forecast_snapshots_kalshi/snapshot_dataset.csv",
          "format": "csv",
          "bytes": 12614878,
          "rows": null,
          "columns": [
            "market_source",
            "market_id",
            "question",
            "question_type",
            "unit",
            "url",
            "description",
            "resolution_criteria",
            "categories",
            "tags",
            "snapshot_time",
            "snapshot_datetime",
            "close_time",
            "close_datetime",
            "days_until_close",
            "resolution_time",
            "community_pred_now",
            "community_pred_1day",
            "community_pred_3day",
            "community_pred_1week",
            "community_pred_2week",
            "community_pred_1month",
            "pred_1week_is_resolution",
            "pred_1month_is_resolution",
            "resolution",
            "resolution_numeric",
            "model_pred_now",
            "outcomes"
          ]
        },
        {
          "path": "data/raw/forecast_snapshots_kalshi/snapshot_dataset.parquet",
          "format": "parquet",
          "bytes": 492351,
          "rows": 20259,
          "columns": [
            "market_source",
            "market_id",
            "question",
            "question_type",
            "unit",
            "url",
            "description",
            "resolution_criteria",
            "categories",
            "tags",
            "snapshot_time",
            "snapshot_datetime",
            "close_time",
            "close_datetime",
            "days_until_close",
            "resolution_time",
            "community_pred_now",
            "community_pred_1day",
            "community_pred_3day",
            "community_pred_1week",
            "community_pred_2week",
            "community_pred_1month",
            "pred_1week_is_resolution",
            "pred_1month_is_resolution",
            "resolution",
            "resolution_numeric",
            "model_pred_now",
            "outcomes"
          ]
        }
      ]
    },
    {
      "id": "forecast_snapshots_metaculus_large",
      "file_count": 3,
      "total_bytes": 6188957,
      "sampled_files": [
        {
          "path": "data/raw/forecast_snapshots_metaculus_large/config.json",
          "format": "json",
          "bytes": 339,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/forecast_snapshots_metaculus_large/snapshot_dataset.csv",
          "format": "csv",
          "bytes": 5653170,
          "rows": null,
          "columns": [
            "market_source",
            "market_id",
            "question",
            "question_type",
            "unit",
            "url",
            "description",
            "resolution_criteria",
            "categories",
            "tags",
            "snapshot_time",
            "snapshot_datetime",
            "close_time",
            "close_datetime",
            "days_until_close",
            "resolution_time",
            "community_pred_now",
            "community_pred_1day",
            "community_pred_3day",
            "community_pred_1week",
            "community_pred_2week",
            "community_pred_1month",
            "pred_1week_is_resolution",
            "pred_1month_is_resolution",
            "resolution",
            "resolution_numeric",
            "forecaster_count_now",
            "range_min",
            "range_max",
            "zero_point",
            "open_lower_bound",
            "open_upper_bound",
            "model_pred_now",
            "outcomes"
          ]
        },
        {
          "path": "data/raw/forecast_snapshots_metaculus_large/snapshot_dataset.parquet",
          "format": "parquet",
          "bytes": 535448,
          "rows": 1998,
          "columns": [
            "market_source",
            "market_id",
            "question",
            "question_type",
            "unit",
            "url",
            "description",
            "resolution_criteria",
            "categories",
            "tags",
            "snapshot_time",
            "snapshot_datetime",
            "close_time",
            "close_datetime",
            "days_until_close",
            "resolution_time",
            "community_pred_now",
            "community_pred_1day",
            "community_pred_3day",
            "community_pred_1week",
            "community_pred_2week",
            "community_pred_1month",
            "pred_1week_is_resolution",
            "pred_1month_is_resolution",
            "resolution",
            "resolution_numeric",
            "forecaster_count_now",
            "range_min",
            "range_max",
            "zero_point",
            "open_lower_bound",
            "open_upper_bound",
            "model_pred_now",
            "outcomes"
          ]
        }
      ]
    },
    {
      "id": "forecast_snapshots_metaculus_small",
      "file_count": 3,
      "total_bytes": 605024,
      "sampled_files": [
        {
          "path": "data/raw/forecast_snapshots_metaculus_small/config.json",
          "format": "json",
          "bytes": 383,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/forecast_snapshots_metaculus_small/snapshot_dataset.csv",
          "format": "csv",
          "bytes": 447601,
          "rows": null,
          "columns": [
            "market_source",
            "market_id",
            "question",
            "question_type",
            "unit",
            "url",
            "description",
            "resolution_criteria",
            "categories",
            "tags",
            "snapshot_time",
            "snapshot_datetime",
            "close_time",
            "close_datetime",
            "days_until_close",
            "resolution_time",
            "community_pred_now",
            "community_pred_1day",
            "community_pred_3day",
            "community_pred_1week",
            "community_pred_2week",
            "community_pred_1month",
            "pred_1week_is_resolution",
            "pred_1month_is_resolution",
            "resolution",
            "resolution_numeric",
            "forecaster_count_now",
            "range_min",
            "range_max",
            "zero_point",
            "open_lower_bound",
            "open_upper_bound",
            "model_pred_now",
            "outcomes"
          ]
        },
        {
          "path": "data/raw/forecast_snapshots_metaculus_small/snapshot_dataset.parquet",
          "format": "parquet",
          "bytes": 157040,
          "rows": 154,
          "columns": [
            "market_source",
            "market_id",
            "question",
            "question_type",
            "unit",
            "url",
            "description",
            "resolution_criteria",
            "categories",
            "tags",
            "snapshot_time",
            "snapshot_datetime",
            "close_time",
            "close_datetime",
            "days_until_close",
            "resolution_time",
            "community_pred_now",
            "community_pred_1day",
            "community_pred_3day",
            "community_pred_1week",
            "community_pred_2week",
            "community_pred_1month",
            "pred_1week_is_resolution",
            "pred_1month_is_resolution",
            "resolution",
            "resolution_numeric",
            "forecaster_count_now",
            "range_min",
            "range_max",
            "zero_point",
            "open_lower_bound",
            "open_upper_bound",
            "model_pred_now",
            "outcomes"
          ]
        }
      ]
    },
    {
      "id": "forecastbench",
      "file_count": 41,
      "total_bytes": 121849554,
      "sampled_files": [
        {
          "path": "data/raw/forecastbench/datasets/forecast_sets/2024-07-21/2024-07-21.ForecastBench.human_public_individual.json",
          "format": "json",
          "bytes": 23526935,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/forecastbench/datasets/forecast_sets/2024-07-21/2024-07-21.ForecastBench.human_super_individual.json",
          "format": "json",
          "bytes": 4595808,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/forecastbench/datasets/question_sets/2024-07-21-human.json",
          "format": "json",
          "bytes": 553367,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/forecastbench/datasets/question_sets/2024-07-21-llm.json",
          "format": "json",
          "bytes": 4831631,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/forecastbench/datasets/question_sets/2025-03-02-llm.json",
          "format": "json",
          "bytes": 4762520,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/forecastbench/datasets/question_sets/2025-03-16-llm.json",
          "format": "json",
          "bytes": 4773527,
          "rows": null,
          "columns": []
        }
      ]
    },
    {
      "id": "futurex_online",
      "file_count": 1,
      "total_bytes": 23727,
      "sampled_files": [
        {
          "path": "data/raw/futurex_online/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 23727,
          "rows": 76,
          "columns": [
            "id",
            "prompt",
            "end_time",
            "level",
            "en_title"
          ]
        }
      ]
    },
    {
      "id": "futurex_past",
      "file_count": 1,
      "total_bytes": 249971,
      "sampled_files": [
        {
          "path": "data/raw/futurex_past/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 249971,
          "rows": 1129,
          "columns": [
            "id",
            "prompt",
            "end_time",
            "level",
            "title",
            "ground_truth"
          ]
        }
      ]
    },
    {
      "id": "halawi_llm_forecasting",
      "file_count": 0,
      "total_bytes": 0,
      "sampled_files": []
    },
    {
      "id": "ir_event_forecasting_sample",
      "file_count": 2,
      "total_bytes": 87000,
      "sampled_files": [
        {
          "path": "data/raw/ir_event_forecasting_sample/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 58712,
          "rows": 20,
          "columns": [
            "original_data",
            "original_question",
            "original_answer",
            "solution",
            "candidates",
            "reasoning_content",
            "question",
            "model_answer"
          ]
        },
        {
          "path": "data/raw/ir_event_forecasting_sample/data/valid-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 28288,
          "rows": 4,
          "columns": [
            "original_data",
            "original_question",
            "original_answer",
            "solution",
            "candidates",
            "reasoning_content",
            "question",
            "model_answer"
          ]
        }
      ]
    },
    {
      "id": "kalshi_filtered",
      "file_count": 1,
      "total_bytes": 176406,
      "sampled_files": [
        {
          "path": "data/raw/kalshi_filtered/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 176406,
          "rows": 1433,
          "columns": [
            "cutoff_time",
            "week_id",
            "ticker",
            "event_ticker",
            "title",
            "subtitle",
            "category",
            "rules_primary",
            "price_at_cutoff",
            "last_price",
            "last_price_dollars",
            "yes_bid",
            "yes_ask",
            "no_bid",
            "no_ask",
            "volume",
            "volume_24h",
            "open_interest",
            "liquidity",
            "liquidity_dollars",
            "days_until_resolution",
            "days_since_open",
            "market_duration_days",
            "open_time",
            "expected_expiration_time",
            "market_type",
            "strike_type",
            "floor_strike",
            "result",
            "settlement_value"
          ]
        }
      ]
    },
    {
      "id": "kalshi_markets",
      "file_count": 1,
      "total_bytes": 1432564,
      "sampled_files": [
        {
          "path": "data/raw/kalshi_markets/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 1432564,
          "rows": 10016,
          "columns": [
            "ticker",
            "event_ticker",
            "market_type",
            "title",
            "subtitle",
            "yes_sub_title",
            "no_sub_title",
            "open_time",
            "close_time",
            "expected_expiration_time",
            "expiration_time",
            "latest_expiration_time",
            "settlement_timer_seconds",
            "status",
            "response_price_units",
            "notional_value",
            "notional_value_dollars",
            "yes_bid",
            "yes_bid_dollars",
            "yes_ask",
            "yes_ask_dollars",
            "no_bid",
            "no_bid_dollars",
            "no_ask",
            "no_ask_dollars",
            "last_price",
            "last_price_dollars",
            "previous_yes_bid",
            "previous_yes_bid_dollars",
            "previous_yes_ask",
            "previous_yes_ask_dollars",
            "previous_price",
            "previous_price_dollars",
            "volume",
            "volume_24h",
            "liquidity",
            "liquidity_dollars",
            "open_interest",
            "result",
            "settlement_value",
            "settlement_value_dollars",
            "can_close_early",
            "expiration_value",
            "category",
            "risk_limit_cents",
            "strike_type",
            "custom_strike",
            "rules_primary",
            "rules_secondary",
            "early_close_condition",
            "tick_size",
            "floor_strike",
            "cap_strike"
          ]
        }
      ]
    },
    {
      "id": "kalshi_prop_closes",
      "file_count": 22,
      "total_bytes": 31251392,
      "sampled_files": [
        {
          "path": "data/raw/kalshi_prop_closes/prop_closes_2026-06-10.jsonl.gz",
          "format": "jsonl",
          "bytes": 1033732,
          "rows": null,
          "columns": [
            "src",
            "sport",
            "league",
            "gid",
            "mid",
            "teams",
            "start",
            "cutoff",
            "type",
            "period",
            "units",
            "desc",
            "cat",
            "limit",
            "prices",
            "ts"
          ]
        },
        {
          "path": "data/raw/kalshi_prop_closes/prop_closes_2026-06-11.jsonl.gz",
          "format": "jsonl",
          "bytes": 1064937,
          "rows": null,
          "columns": [
            "src",
            "sport",
            "league",
            "gid",
            "mid",
            "teams",
            "start",
            "cutoff",
            "type",
            "period",
            "units",
            "desc",
            "cat",
            "limit",
            "prices",
            "ts"
          ]
        },
        {
          "path": "data/raw/kalshi_prop_closes/prop_closes_2026-06-12.jsonl.gz",
          "format": "jsonl",
          "bytes": 1403193,
          "rows": null,
          "columns": [
            "src",
            "sport",
            "league",
            "gid",
            "mid",
            "teams",
            "start",
            "cutoff",
            "type",
            "period",
            "units",
            "desc",
            "cat",
            "limit",
            "prices",
            "ts"
          ]
        },
        {
          "path": "data/raw/kalshi_prop_closes/prop_closes_2026-06-13.jsonl.gz",
          "format": "jsonl",
          "bytes": 1659277,
          "rows": null,
          "columns": [
            "src",
            "sport",
            "league",
            "gid",
            "mid",
            "teams",
            "start",
            "cutoff",
            "type",
            "period",
            "units",
            "desc",
            "cat",
            "limit",
            "prices",
            "ts"
          ]
        },
        {
          "path": "data/raw/kalshi_prop_closes/prop_closes_2026-06-14.jsonl.gz",
          "format": "jsonl",
          "bytes": 1537293,
          "rows": null,
          "columns": [
            "src",
            "sport",
            "league",
            "gid",
            "mid",
            "teams",
            "start",
            "cutoff",
            "type",
            "period",
            "units",
            "desc",
            "cat",
            "limit",
            "prices",
            "ts"
          ]
        },
        {
          "path": "data/raw/kalshi_prop_closes/prop_closes_2026-06-15.jsonl.gz",
          "format": "jsonl",
          "bytes": 1324247,
          "rows": null,
          "columns": [
            "src",
            "sport",
            "league",
            "gid",
            "mid",
            "teams",
            "start",
            "cutoff",
            "type",
            "period",
            "units",
            "desc",
            "cat",
            "limit",
            "prices",
            "ts"
          ]
        }
      ]
    },
    {
      "id": "kalshi_rfq_momentum",
      "file_count": 0,
      "total_bytes": 0,
      "sampled_files": []
    },
    {
      "id": "kalshi_trades_trevorjs",
      "file_count": 20,
      "total_bytes": 5685396710,
      "sampled_files": [
        {
          "path": "data/raw/kalshi_trades_trevorjs/markets-0000.parquet",
          "format": "parquet",
          "bytes": 136002798,
          "rows": 5000000,
          "columns": [
            "ticker",
            "event_ticker",
            "market_type",
            "title",
            "yes_sub_title",
            "no_sub_title",
            "status",
            "yes_bid",
            "yes_ask",
            "no_bid",
            "no_ask",
            "last_price",
            "volume",
            "volume_24h",
            "open_interest",
            "result",
            "created_time",
            "open_time",
            "close_time"
          ]
        },
        {
          "path": "data/raw/kalshi_trades_trevorjs/markets-0001.parquet",
          "format": "parquet",
          "bytes": 364296885,
          "rows": 5000000,
          "columns": [
            "ticker",
            "event_ticker",
            "market_type",
            "title",
            "yes_sub_title",
            "no_sub_title",
            "status",
            "yes_bid",
            "yes_ask",
            "no_bid",
            "no_ask",
            "last_price",
            "volume",
            "volume_24h",
            "open_interest",
            "result",
            "created_time",
            "open_time",
            "close_time"
          ]
        },
        {
          "path": "data/raw/kalshi_trades_trevorjs/markets-0002.parquet",
          "format": "parquet",
          "bytes": 400851398,
          "rows": 5000000,
          "columns": [
            "ticker",
            "event_ticker",
            "market_type",
            "title",
            "yes_sub_title",
            "no_sub_title",
            "status",
            "yes_bid",
            "yes_ask",
            "no_bid",
            "no_ask",
            "last_price",
            "volume",
            "volume_24h",
            "open_interest",
            "result",
            "created_time",
            "open_time",
            "close_time"
          ]
        },
        {
          "path": "data/raw/kalshi_trades_trevorjs/markets-0003.parquet",
          "format": "parquet",
          "bytes": 212771767,
          "rows": 2464713,
          "columns": [
            "ticker",
            "event_ticker",
            "market_type",
            "title",
            "yes_sub_title",
            "no_sub_title",
            "status",
            "yes_bid",
            "yes_ask",
            "no_bid",
            "no_ask",
            "last_price",
            "volume",
            "volume_24h",
            "open_interest",
            "result",
            "created_time",
            "open_time",
            "close_time"
          ]
        },
        {
          "path": "data/raw/kalshi_trades_trevorjs/trades-0000.parquet",
          "format": "parquet",
          "bytes": 313703141,
          "rows": 10000000,
          "columns": [
            "trade_id",
            "ticker",
            "count",
            "yes_price",
            "no_price",
            "taker_side",
            "created_time"
          ]
        },
        {
          "path": "data/raw/kalshi_trades_trevorjs/trades-0001.parquet",
          "format": "parquet",
          "bytes": 288411075,
          "rows": 9999678,
          "columns": [
            "trade_id",
            "ticker",
            "count",
            "yes_price",
            "no_price",
            "taker_side",
            "created_time"
          ]
        }
      ]
    },
    {
      "id": "kalshi_trades_wmitch",
      "file_count": 2,
      "total_bytes": 276868855,
      "sampled_files": [
        {
          "path": "data/raw/kalshi_trades_wmitch/data/train-00000-of-00002.parquet",
          "format": "parquet",
          "bytes": 138396419,
          "rows": 2538256,
          "columns": [
            "trade_id",
            "ticker",
            "count",
            "created_time",
            "yes_price",
            "no_price",
            "taker_side",
            "market_ticker"
          ]
        },
        {
          "path": "data/raw/kalshi_trades_wmitch/data/train-00001-of-00002.parquet",
          "format": "parquet",
          "bytes": 138472436,
          "rows": 2538255,
          "columns": [
            "trade_id",
            "ticker",
            "count",
            "created_time",
            "yes_price",
            "no_price",
            "taker_side",
            "market_ticker"
          ]
        }
      ]
    },
    {
      "id": "kalshibench_v1",
      "file_count": 1,
      "total_bytes": 162550,
      "sampled_files": [
        {
          "path": "data/raw/kalshibench_v1/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 162550,
          "rows": 894,
          "columns": [
            "id",
            "question",
            "description",
            "category",
            "close_time",
            "ground_truth",
            "market_probability",
            "series_ticker",
            "source"
          ]
        }
      ]
    },
    {
      "id": "kalshibench_v2",
      "file_count": 1,
      "total_bytes": 202676,
      "sampled_files": [
        {
          "path": "data/raw/kalshibench_v2/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 202676,
          "rows": 1531,
          "columns": [
            "id",
            "question",
            "description",
            "category",
            "close_time",
            "ground_truth",
            "market_probability",
            "series_ticker",
            "source"
          ]
        }
      ]
    },
    {
      "id": "metaculus_binary_chandak",
      "file_count": 1,
      "total_bytes": 3537445,
      "sampled_files": [
        {
          "path": "data/raw/metaculus_binary_chandak/train/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 3537445,
          "rows": 4265,
          "columns": [
            "date_resolve_at",
            "date_begin",
            "extracted_urls",
            "question_type",
            "url",
            "background",
            "resolution_criteria",
            "is_resolved",
            "date_close",
            "question",
            "data_source",
            "resolution",
            "nr_forecasters"
          ]
        }
      ]
    },
    {
      "id": "metaculus_binary_jijivski",
      "file_count": 2,
      "total_bytes": 89795,
      "sampled_files": [
        {
          "path": "data/raw/metaculus_binary_jijivski/metaculus_binary.json",
          "format": "json",
          "bytes": 0,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/metaculus_binary_jijivski/test.json",
          "format": "json",
          "bytes": 89795,
          "rows": null,
          "columns": [
            "question",
            "possibilities",
            "label",
            "description",
            "created_time",
            "resolve_time"
          ]
        }
      ]
    },
    {
      "id": "mirai",
      "file_count": 2,
      "total_bytes": 107011,
      "sampled_files": [
        {
          "path": "data/raw/mirai/repo/examples/outputs_raw/gpt-4o_react_block.json",
          "format": "json",
          "bytes": 58033,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/mirai/repo/examples/outputs_raw/gpt-4o_react_func.json",
          "format": "json",
          "bytes": 48978,
          "rows": null,
          "columns": []
        }
      ]
    },
    {
      "id": "mlb_polymarket_kalshi_matched_sample",
      "file_count": 2,
      "total_bytes": 132839,
      "sampled_files": [
        {
          "path": "data/raw/mlb_polymarket_kalshi_matched_sample/mlb_matched_sample_ARI-MIN_2026-06-21.csv",
          "format": "csv",
          "bytes": 110091,
          "rows": null,
          "columns": [
            "ts",
            "date",
            "game",
            "team",
            "poly_bid",
            "poly_ask",
            "poly_mid",
            "kalshi_yes_bid",
            "kalshi_yes_ask",
            "kalshi_mid",
            "xvenue_spread",
            "winner",
            "won",
            "settled"
          ]
        },
        {
          "path": "data/raw/mlb_polymarket_kalshi_matched_sample/mlb_matched_sample_ARI-MIN_2026-06-21.parquet",
          "format": "parquet",
          "bytes": 22748,
          "rows": 1204,
          "columns": [
            "ts",
            "date",
            "game",
            "team",
            "poly_bid",
            "poly_ask",
            "poly_mid",
            "kalshi_yes_bid",
            "kalshi_yes_ask",
            "kalshi_mid",
            "xvenue_spread",
            "winner",
            "won",
            "settled"
          ]
        }
      ]
    },
    {
      "id": "openforesight",
      "file_count": 8,
      "total_bytes": 477959595,
      "sampled_files": [
        {
          "path": "data/raw/openforesight/data/aljazeera2026Q1-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 1362638,
          "rows": 330,
          "columns": [
            "qid",
            "question_title",
            "background",
            "resolution_criteria",
            "answer",
            "answer_type",
            "url",
            "article_title",
            "article_description",
            "article_maintext",
            "article_publish_date",
            "article_modify_date",
            "article_download_date",
            "data_source",
            "news_source",
            "resolution_date",
            "question_start_date",
            "prompt",
            "prompt_without_retrieval"
          ]
        },
        {
          "path": "data/raw/openforesight/data/aljazeeraLate2025-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 4652896,
          "rows": 491,
          "columns": [
            "qid",
            "question_title",
            "background",
            "resolution_criteria",
            "answer",
            "answer_type",
            "url",
            "article_title",
            "article_description",
            "article_maintext",
            "article_publish_date",
            "article_modify_date",
            "article_download_date",
            "data_source",
            "news_source",
            "resolution_date",
            "question_start_date",
            "prompt",
            "prompt_without_retrieval"
          ]
        },
        {
          "path": "data/raw/openforesight/data/skysports2025-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 6901245,
          "rows": 1788,
          "columns": [
            "qid",
            "question_title",
            "background",
            "resolution_criteria",
            "answer_type",
            "answer",
            "url",
            "article_maintext",
            "article_publish_date",
            "article_modify_date",
            "article_download_date",
            "article_description",
            "article_title",
            "data_source",
            "news_source",
            "resolution_date",
            "question_start_date",
            "prompt",
            "prompt_without_retrieval"
          ]
        },
        {
          "path": "data/raw/openforesight/data/test-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 2793065,
          "rows": 302,
          "columns": [
            "qid",
            "question_title",
            "background",
            "resolution_criteria",
            "answer_type",
            "answer",
            "url",
            "article_maintext",
            "article_publish_date",
            "article_modify_date",
            "article_download_date",
            "article_description",
            "article_title",
            "data_source",
            "news_source",
            "resolution_date",
            "question_start_date",
            "prompt",
            "prompt_without_retrieval"
          ]
        },
        {
          "path": "data/raw/openforesight/data/train-00000-of-00003.parquet",
          "format": "parquet",
          "bytes": 154156215,
          "rows": 17395,
          "columns": [
            "qid",
            "question_title",
            "background",
            "resolution_criteria",
            "answer_type",
            "answer",
            "url",
            "article_maintext",
            "article_publish_date",
            "article_modify_date",
            "article_download_date",
            "article_description",
            "article_title",
            "data_source",
            "news_source",
            "resolution_date",
            "question_start_date",
            "prompt",
            "prompt_without_retrieval"
          ]
        },
        {
          "path": "data/raw/openforesight/data/train-00001-of-00003.parquet",
          "format": "parquet",
          "bytes": 157010712,
          "rows": 17395,
          "columns": [
            "qid",
            "question_title",
            "background",
            "resolution_criteria",
            "answer_type",
            "answer",
            "url",
            "article_maintext",
            "article_publish_date",
            "article_modify_date",
            "article_download_date",
            "article_description",
            "article_title",
            "data_source",
            "news_source",
            "resolution_date",
            "question_start_date",
            "prompt",
            "prompt_without_retrieval"
          ]
        }
      ]
    },
    {
      "id": "polybench",
      "file_count": 0,
      "total_bytes": 0,
      "sampled_files": []
    },
    {
      "id": "polymarket_10000",
      "file_count": 1,
      "total_bytes": 4750512,
      "sampled_files": [
        {
          "path": "data/raw/polymarket_10000/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 4750512,
          "rows": 10000,
          "columns": [
            "enable_order_book",
            "active",
            "closed",
            "archived",
            "accepting_orders",
            "accepting_order_timestamp",
            "minimum_order_size",
            "minimum_tick_size",
            "condition_id",
            "question_id",
            "question",
            "description",
            "market_slug",
            "end_date_iso",
            "game_start_time",
            "seconds_delay",
            "fpmm",
            "maker_base_fee",
            "taker_base_fee",
            "notifications_enabled",
            "neg_risk",
            "neg_risk_market_id",
            "neg_risk_request_id",
            "icon",
            "image",
            "rewards",
            "is_50_50_outcome",
            "tokens",
            "tags"
          ]
        }
      ]
    },
    {
      "id": "polymarket_5min_crypto_updown",
      "file_count": 14,
      "total_bytes": 725013472,
      "sampled_files": [
        {
          "path": "data/raw/polymarket_5min_crypto_updown/bnb_markets.parquet",
          "format": "parquet",
          "bytes": 2994440,
          "rows": 12259,
          "columns": [
            "condition_id",
            "event_id",
            "slug",
            "market_start",
            "market_end",
            "recorded_at",
            "token_up",
            "token_down",
            "volume",
            "liquidity",
            "outcome",
            "n_ticks"
          ]
        },
        {
          "path": "data/raw/polymarket_5min_crypto_updown/bnb_ticks.parquet",
          "format": "parquet",
          "bytes": 76690643,
          "rows": 3677623,
          "columns": [
            "condition_id",
            "t",
            "ts_utc",
            "bu",
            "au",
            "bd",
            "ad",
            "su",
            "sd",
            "sau",
            "sad",
            "du",
            "dd"
          ]
        },
        {
          "path": "data/raw/polymarket_5min_crypto_updown/btc_markets.parquet",
          "format": "parquet",
          "bytes": 3921352,
          "rows": 15682,
          "columns": [
            "condition_id",
            "event_id",
            "slug",
            "market_start",
            "market_end",
            "recorded_at",
            "token_up",
            "token_down",
            "volume",
            "liquidity",
            "outcome",
            "n_ticks"
          ]
        },
        {
          "path": "data/raw/polymarket_5min_crypto_updown/btc_ticks.parquet",
          "format": "parquet",
          "bytes": 182475803,
          "rows": 4704518,
          "columns": [
            "condition_id",
            "t",
            "ts_utc",
            "bu",
            "au",
            "bd",
            "ad",
            "su",
            "sd",
            "sau",
            "sad",
            "du",
            "dd"
          ]
        },
        {
          "path": "data/raw/polymarket_5min_crypto_updown/doge_markets.parquet",
          "format": "parquet",
          "bytes": 2999540,
          "rows": 12259,
          "columns": [
            "condition_id",
            "event_id",
            "slug",
            "market_start",
            "market_end",
            "recorded_at",
            "token_up",
            "token_down",
            "volume",
            "liquidity",
            "outcome",
            "n_ticks"
          ]
        },
        {
          "path": "data/raw/polymarket_5min_crypto_updown/doge_ticks.parquet",
          "format": "parquet",
          "bytes": 80060676,
          "rows": 3677623,
          "columns": [
            "condition_id",
            "t",
            "ts_utc",
            "bu",
            "au",
            "bd",
            "ad",
            "su",
            "sd",
            "sau",
            "sad",
            "du",
            "dd"
          ]
        }
      ]
    },
    {
      "id": "polymarket_clean",
      "file_count": 1,
      "total_bytes": 81935,
      "sampled_files": [
        {
          "path": "data/raw/polymarket_clean/data/train-00000-of-00001.parquet",
          "format": "parquet",
          "bytes": 81935,
          "rows": 1000,
          "columns": [
            "id",
            "amount",
            "shares",
            "userId",
            "outcome",
            "dpmShares",
            "probAfter",
            "contractId",
            "probBefore",
            "createdTime"
          ]
        }
      ]
    },
    {
      "id": "polymarket_dataset_bbasavar",
      "file_count": 1,
      "total_bytes": 94707271,
      "sampled_files": [
        {
          "path": "data/raw/polymarket_dataset_bbasavar/fine_tune.jsonl",
          "format": "jsonl",
          "bytes": 94707271,
          "rows": null,
          "columns": [
            "instruction",
            "input",
            "output"
          ]
        }
      ]
    },
    {
      "id": "polymarket_kalshi_scoresync_sample",
      "file_count": 3,
      "total_bytes": 186960,
      "sampled_files": [
        {
          "path": "data/raw/polymarket_kalshi_scoresync_sample/DATA_CARD.json",
          "format": "json",
          "bytes": 1595,
          "rows": null,
          "columns": []
        },
        {
          "path": "data/raw/polymarket_kalshi_scoresync_sample/kalshi_scoresync_PHI_at_TOR_2026-01-11.parquet",
          "format": "parquet",
          "bytes": 56948,
          "rows": 1844,
          "columns": [
            "timestamp",
            "ticker",
            "title",
            "game_id",
            "home_team",
            "away_team",
            "home_score",
            "away_score",
            "score_diff",
            "period",
            "time_remaining",
            "game_state",
            "yes_bid",
            "yes_ask",
            "mid",
            "spread",
            "spread_cents",
            "yes_bid_size",
            "yes_ask_size",
            "min_size",
            "open_interest",
            "volume_24h",
            "liquidity_score",
            "tradable_now",
            "is_burst_mode",
            "datetime_utc"
          ]
        },
        {
          "path": "data/raw/polymarket_kalshi_scoresync_sample/kalshi_scoresync_PHI_at_TOR_HIGHLIGHTS.csv",
          "format": "csv",
          "bytes": 128417,
          "rows": null,
          "columns": [
            "datetime_utc",
            "period",
            "time_remaining",
            "game_state",
            "home_team",
            "away_team",
            "home_score",
            "away_score",
            "score_diff",
            "score_changed",
            "yes_bid",
            "yes_ask",
            "mid",
            "spread",
            "yes_bid_size",
            "yes_ask_size"
          ]
        }
      ]
    },
    {
      "id": "polymarket_minute_parquet",
      "file_count": 18,
      "total_bytes": 1432129480,
      "sampled_files": [
        {
          "path": "data/raw/polymarket_minute_parquet/data/7397d2df-6622-4860-8797-6803b5740470.parquet",
          "format": "parquet",
          "bytes": 8043,
          "rows": 342,
          "columns": [
            "timestamp",
            "price",
            "token_id",
            "side",
            "market_id",
            "event_id",
            "question"
          ]
        },
        {
          "path": "data/raw/polymarket_minute_parquet/data/train-00000-of-00017.parquet",
          "format": "parquet",
          "bytes": 44720136,
          "rows": 6008518,
          "columns": [
            "timestamp",
            "price",
            "token_id",
            "side",
            "market_id",
            "event_id",
            "question"
          ]
        },
        {
          "path": "data/raw/polymarket_minute_parquet/data/train-00001-of-00017.parquet",
          "format": "parquet",
          "bytes": 44566730,
          "rows": 6008517,
          "columns": [
            "timestamp",
            "price",
            "token_id",
            "side",
            "market_id",
            "event_id",
            "question"
          ]
        },
        {
          "path": "data/raw/polymarket_minute_parquet/data/train-00002-of-00017.parquet",
          "format": "parquet",
          "bytes": 89573575,
          "rows": 12017035,
          "columns": [
            "timestamp",
            "price",
            "token_id",
            "side",
            "market_id",
            "event_id",
            "question"
          ]
        },
        {
          "path": "data/raw/polymarket_minute_parquet/data/train-00003-of-00017.parquet",
          "format": "parquet",
          "bytes": 89352774,
          "rows": 12017035,
          "columns": [
            "timestamp",
            "price",
            "token_id",
            "side",
            "market_id",
            "event_id",
            "question"
          ]
        },
        {
          "path": "data/raw/polymarket_minute_parquet/data/train-00004-of-00017.parquet",
          "format": "parquet",
          "bytes": 89417491,
          "rows": 12017035,
          "columns": [
            "timestamp",
            "price",
            "token_id",
            "side",
            "market_id",
            "event_id",
            "question"
          ]
        }
      ]
    },
    {
      "id": "prophet",
      "file_count": 0,
      "total_bytes": 0,
      "sampled_files": []
    },
    {
      "id": "prophet_arena_100",
      "file_count": 1,
      "total_bytes": 2115113,
      "sampled_files": [
        {
          "path": "data/raw/prophet_arena_100/subset_data_100.csv",
          "format": "csv",
          "bytes": 2115113,
          "rows": null,
          "columns": [
            "event_ticker",
            "title",
            "category",
            "markets",
            "close_time",
            "market_outcome",
            "sources",
            "market_info",
            "snapshot_time",
            "submission_id",
            "submission_created_at"
          ]
        }
      ]
    },
    {
      "id": "prophet_arena_1200",
      "file_count": 1,
      "total_bytes": 8988523,
      "sampled_files": [
        {
          "path": "data/raw/prophet_arena_1200/subset_data_1200.csv",
          "format": "csv",
          "bytes": 8988523,
          "rows": null,
          "columns": [
            "submission_id",
            "event_ticker",
            "title",
            "snapshot_time",
            "close_time",
            "market_data",
            "market_outcome",
            "category",
            "markets",
            "augmented_title",
            "rules",
            "sources"
          ]
        }
      ]
    }
  ]
}